[llvm-commits] [llvm] r58227 - /llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Sun Oct 26 23:05:26 PDT 2008

Author: lattner
Date: Mon Oct 27 01:05:26 2008
New Revision: 58227

URL: http://llvm.org/viewvc/llvm-project?rev=58227&view=rev
Log:
Add a new LargeBlockInfo helper, which is just a wrapper around
a trivial dense map.  Use this in RewriteSingleStoreAlloca to
avoid aggressively rescanning blocks over and over again.  This
fixes PR2925, speeding up mem2reg on the testcase in that bug
from 4.56s to 0.02s in a debug build on my machine.

Modified:
    llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Modified: llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=58227&r1=58226&r2=58227&view=diff

==============================================================================

--- llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Mon Oct 27 01:05:26 2008
@@ -106,6 +106,58 @@
       Values.swap(RHS.Values);
     }
   };
+  
+  /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+  /// load/store instructions in the block that directly load or store an alloca.
+  ///
+  /// This functionality is important because it avoids scanning large basic
+  /// blocks multiple times when promoting many allocas in the same block.
+  class VISIBILITY_HIDDEN LargeBlockInfo {
+    /// InstNumbers - For each instruction that we track, keep the index of the
+    /// instruction.  The index starts out as the number of the instruction from
+    /// the start of the block.
+    DenseMap<const Instruction *, unsigned> InstNumbers;
+  public:
+    
+    /// isInterestingInstruction - This code only looks at accesses to allocas.
+    static bool isInterestingInstruction(const Instruction *I) {
+      return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+             (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+    }
+    
+    /// getInstructionIndex - Get or calculate the index of the specified
+    /// instruction.
+    unsigned getInstructionIndex(const Instruction *I) {
+      assert(isInterestingInstruction(I) &&
+             "Not a load/store to/from an alloca?");
+      
+      // If we already have this instruction number, return it.
+      DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+      if (It != InstNumbers.end()) return It->second;
+      
+      // Scan the whole block to get the instruction.  This accumulates
+      // information for every interesting instruction in the block, in order to
+      // avoid gratuitus rescans.
+      const BasicBlock *BB = I->getParent();
+      unsigned InstNo = 0;
+      for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+           BBI != E; ++BBI)
+        if (isInterestingInstruction(BBI))
+          InstNumbers[BBI] = InstNo++;
+      It = InstNumbers.find(I);
+      
+      assert(It != InstNumbers.end() && "Didn't insert instruction?");
+      return It->second;
+    }
+    
+    void deleteValue(const Instruction *I) {
+      InstNumbers.erase(I);
+    }
+    
+    void clear() {
+      InstNumbers.clear();
+    }
+  };
 
   struct VISIBILITY_HIDDEN PromoteMem2Reg {
     /// Allocas - The alloca instructions being promoted.
@@ -189,11 +241,14 @@
                              const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
                              SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
     
-    void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info);
+    void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
 
-    bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI);
+    bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI,
+                                  LargeBlockInfo &LBI);
     void PromoteLocallyUsedAllocas(BasicBlock *BB,
-                                   const std::vector<AllocaInst*> &AIs);
+                                   const std::vector<AllocaInst*> &AIs,
+                                   LargeBlockInfo &LBI);
 
     void RenamePass(BasicBlock *BB, BasicBlock *Pred,
                     RenamePassData::ValVector &IncVals,
@@ -254,7 +309,6 @@
       }
     }
   };
-
 }  // end of anonymous namespace
 
 
@@ -271,6 +325,7 @@
   if (AST) PointerAllocaValues.resize(Allocas.size());
 
   AllocaInfo Info;
+  LargeBlockInfo LBI;
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
@@ -298,14 +353,17 @@
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
     if (Info.DefiningBlocks.size() == 1) {
-      RewriteSingleStoreAlloca(AI, Info);
+      RewriteSingleStoreAlloca(AI, Info, LBI);
 
       // Finally, after the scan, check to see if the store is all that is left.
       if (Info.UsingBlocks.empty()) {
         // Remove the (now dead) store and alloca.
         Info.OnlyStore->eraseFromParent();
+        LBI.deleteValue(Info.OnlyStore);
+
         if (AST) AST->deleteValue(AI);
         AI->eraseFromParent();
+        LBI.deleteValue(AI);
         
         // The alloca has been processed, move on.
         RemoveFromAllocasList(AllocaNum);
@@ -342,7 +400,7 @@
     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
 
     // At this point, we're committed to promoting the alloca using IDF's, and
-    // the standard SSA construction algorithm.  Determine which blocks need phi
+    // the standard SSA construction algorithm.  Determine which blocks need PHI
     // nodes and see if we can optimize out some work by avoiding insertion of
     // dead phi nodes.
     DetermineInsertionPoint(AI, AllocaNum, Info);
@@ -358,19 +416,22 @@
     // efficiently.
     if (LocAllocas.size() == 1) {
       // If we can do the quick promotion pass, do so now.
-      if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0]))
+      if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0], LBI))
         RetryList.push_back(LocAllocas[0]);  // Failed, retry later.
     } else {
       // Locally promote anything possible.  Note that if this is unable to
       // promote a particular alloca, it puts the alloca onto the Allocas vector
       // for global processing.
-      PromoteLocallyUsedAllocas(I->first, LocAllocas);
+      PromoteLocallyUsedAllocas(I->first, LocAllocas, LBI);
     }
   }
 
   if (Allocas.empty())
     return; // All of the allocas must have been trivial!
 
+  LBI.clear();
+  
+  
   // Set the incoming values for the basic block to be null values for all of
   // the alloca's.  We do this in case there is a load of a value that has not
   // been stored yet.  In this case, it will get this null value.
@@ -630,67 +691,63 @@
     DFBlocks.clear();
   }
 }
-  
 
 /// RewriteSingleStoreAlloca - If there is only a single store to this value,
 /// replace any loads of it that are directly dominated by the definition with
 /// the value stored.
 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
-                                              AllocaInfo &Info) {
+                                              AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
   StoreInst *OnlyStore = Info.OnlyStore;
   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+  BasicBlock *StoreBB = OnlyStore->getParent();
+  int StoreIndex = -1;
+
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
   
-  // Be aware of loads before the store.
-  SmallPtrSet<BasicBlock*, 32> ProcessedBlocks;
-  for (unsigned i = 0, e = Info.UsingBlocks.size(); i != e; ++i) {
-    BasicBlock *UseBlock = Info.UsingBlocks[i];
-    
-    // If we already processed this block, don't reprocess it.
-    if (!ProcessedBlocks.insert(UseBlock)) {
-      Info.UsingBlocks[i] = Info.UsingBlocks.back();
-      Info.UsingBlocks.pop_back();
-      --i; --e;
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+    Instruction *UserInst = cast<Instruction>(*UI++);
+    if (!isa<LoadInst>(UserInst)) {
+      assert(UserInst == OnlyStore && "Should only have load/stores");
       continue;
     }
+    LoadInst *LI = cast<LoadInst>(UserInst);
     
-    // If the store dominates the block and if we haven't processed it yet,
-    // do so now.  We can't handle the case where the store doesn't dominate a
-    // block because there may be a path between the store and the use, but we
-    // may need to insert phi nodes to handle dominance properly.
-    if (!StoringGlobalVal && !dominates(OnlyStore->getParent(), UseBlock))
-      continue;
-    
-    // If the use and store are in the same block, do a quick scan to
-    // verify that there are no uses before the store.
-    if (UseBlock == OnlyStore->getParent()) {
-      BasicBlock::iterator I = UseBlock->begin();
-      for (; &*I != OnlyStore; ++I) { // scan block for store.
-        if (isa<LoadInst>(I) && I->getOperand(0) == AI)
-          break;
-      }
-      if (&*I != OnlyStore)
-        continue;  // Do not promote the uses of this in this block.
-    }
-    
-    // Otherwise, if this is a different block or if all uses happen
-    // after the store, do a simple linear scan to replace loads with
-    // the stored value.
-    for (BasicBlock::iterator I = UseBlock->begin(), E = UseBlock->end();
-         I != E; ) {
-      if (LoadInst *LI = dyn_cast<LoadInst>(I++)) {
-        if (LI->getOperand(0) == AI) {
-          LI->replaceAllUsesWith(OnlyStore->getOperand(0));
-          if (AST && isa<PointerType>(LI->getType()))
-            AST->deleteValue(LI);
-          LI->eraseFromParent();
+    // Okay, if we have a load from the alloca, we want to replace it with the
+    // only value stored to the alloca.  We can do this if the value is
+    // dominated by the store.  If not, we use the rest of the mem2reg machinery
+    // to insert the phi nodes as needed.
+    if (!StoringGlobalVal) {  // Non-instructions are always dominated.
+      if (LI->getParent() == StoreBB) {
+        // If we have a use that is in the same block as the store, compare the
+        // indices of the two instructions to see which one came first.  If the
+        // load came before the store, we can't handle it.
+        if (StoreIndex == -1)
+          StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+          // Can't handle this load, bail out.
+          Info.UsingBlocks.push_back(StoreBB);
+          continue;
         }
+        
+      } else if (LI->getParent() != StoreBB &&
+                 !dominates(StoreBB, LI->getParent())) {
+        // If the load and store are in different blocks, use BB dominance to
+        // check their relationships.  If the store doesn't dom the use, bail
+        // out.
+        Info.UsingBlocks.push_back(LI->getParent());
+        continue;
       }
     }
     
-    // Finally, remove this block from the UsingBlock set.
-    Info.UsingBlocks[i] = Info.UsingBlocks.back();
-    Info.UsingBlocks.pop_back();
-    --i; --e;
+    // Otherwise, we *can* safely rewrite this load.
+    LI->replaceAllUsesWith(OnlyStore->getOperand(0));
+    if (AST && isa<PointerType>(LI->getType()))
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
   }
 }
 
@@ -709,7 +766,8 @@
 ///
 /// ... so long as A is not used before undef is set.
 ///
-bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI) {
+bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI,
+                                              LargeBlockInfo &LBI) {
   assert(!AI->use_empty() && "There are no uses of the alloca!");
 
   // Handle degenerate cases quickly.
@@ -724,6 +782,7 @@
       // Otherwise it must be a store which is never read.
       assert(isa<StoreInst>(U));
     }
+    LBI.deleteValue(U);
     BB->getInstList().erase(U);
   } else {
     // Uses of the uninitialized memory location shall get undef.
@@ -740,12 +799,14 @@
           if (AST && isa<PointerType>(LI->getType()))
             AST->deleteValue(LI);
           BB->getInstList().erase(LI);
+          LBI.deleteValue(LI);
         }
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
         if (SI->getOperand(1) == AI) {
           // Store updates the "current value"...
           CurVal = SI->getOperand(0);
           BB->getInstList().erase(SI);
+          LBI.deleteValue(SI);
         }
       }
     }
@@ -756,7 +817,8 @@
   assert(AI->use_empty() && "Uses of alloca from more than one BB??");
   if (AST) AST->deleteValue(AI);
   AI->eraseFromParent();
-  
+  LBI.deleteValue(AI);
+
   ++NumLocalPromoted;
   return false;
 }
@@ -767,7 +829,8 @@
 /// basic blocks, as we don't want to rescan the entire basic block for each
 /// alloca which is locally used in it (which might be a lot).
 void PromoteMem2Reg::
-PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector<AllocaInst*> &AIs) {
+PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector<AllocaInst*> &AIs,
+                          LargeBlockInfo &LBI) {
   DenseMap<AllocaInst*, Value*> CurValues;
   for (unsigned i = 0, e = AIs.size(); i != e; ++i)
     CurValues[AIs[i]] = 0; // Insert with null value
@@ -791,6 +854,7 @@
             if (AST && isa<PointerType>(LI->getType()))
               AST->deleteValue(LI);
             BB->getInstList().erase(LI);
+            LBI.deleteValue(LI);
           }
         }
       }
@@ -801,6 +865,7 @@
           // Store updates the "current value"...
           AIt->second = SI->getOperand(0);
           SI->eraseFromParent();
+          LBI.deleteValue(SI);
         }
       }
     }
@@ -813,6 +878,7 @@
     assert(AI->use_empty() && "Uses of alloca from more than one BB??");
     if (AST) AST->deleteValue(AI);
     AI->eraseFromParent();
+    LBI.deleteValue(AI);
   }
 
   NumLocalPromoted += CurValues.size();