[llvm-commits] [llvm] r61022 - in /llvm/trunk: include/llvm/Analysis/MemoryDependenceAnalysis.h lib/Analysis/MemoryDependenceAnalysis.cpp test/Transforms/GVN/rle-must-alias.ll test/Transforms/GVN/rle-no-phi-translate.ll test/Transforms/GVN/rle-phi-translate.ll

Chris Lattner sabre at nondot.org
Sun Dec 14 19:35:32 PST 2008


Author: lattner
Date: Sun Dec 14 21:35:32 2008
New Revision: 61022

URL: http://llvm.org/viewvc/llvm-project?rev=61022&view=rev
Log:
Implement initial support for PHI translation in memdep.  This means that
memdep keeps track of how PHIs affect the pointer in dep queries, which 
allows it to eliminate the load in cases like rle-phi-translate.ll, which
basically end up being:

BB1:
   X = load P
   br BB3
BB2:
   Y = load Q
   br BB3
BB3:
   R = phi [P] [Q]
   load R

turning "load R" into a phi of X/Y.  In addition to additional exposed
opportunities, this makes memdep safe in many cases that it wasn't before
(which is required for load PRE) and also makes it substantially more 
efficient.  For example, consider:


bb1:  // has many predecessors.
   P = some_operator()
   load P

In this example, previously memdep would scan all the predecessors of BB1
to see if they had something that would mustalias P.  In some cases (e.g.
test/Transforms/GVN/rle-must-alias.ll) it would actually find them and end
up eliminating something.  In many other cases though, it would scan and not
find anything useful.  MemDep now stops at a block if the pointer is defined
in that block and cannot be phi translated to predecessors.  This causes it
to miss the (rare) cases like rle-must-alias.ll, but makes it faster by not
scanning tons of stuff that is unlikely to be useful.  For example, this
speeds up GVN as a whole from 3.928s to 2.448s (60%)!.  IMO, scalar GVN 
should be enhanced to simplify the rle-must-alias pointer base anyway, which
would allow the loads to be eliminated.

In the future, this should be enhanced to phi translate through geps and 
bitcasts as well (as indicated by FIXMEs) making memdep even more powerful.


Added:
    llvm/trunk/test/Transforms/GVN/rle-phi-translate.ll
Modified:
    llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
    llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
    llvm/trunk/test/Transforms/GVN/rle-must-alias.ll
    llvm/trunk/test/Transforms/GVN/rle-no-phi-translate.ll

Modified: llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h?rev=61022&r1=61021&r2=61022&view=diff

==============================================================================
--- llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h Sun Dec 14 21:35:32 2008
@@ -156,12 +156,18 @@
     /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if
     /// the dependence is a read only dependence, false if read/write.
     typedef PointerIntPair<Value*, 1, bool> ValueIsLoadPair;
-    
+
+    /// BBSkipFirstBlockPair - This pair is used when caching information for a
+    /// block.  If the pointer is null, the cache value is not a full query that
+    /// starts at the specified block.  If non-null, the bool indicates whether
+    /// or not the contents of the block was skipped.
+    typedef PointerIntPair<BasicBlock*, 1, bool> BBSkipFirstBlockPair;
+
     /// CachedNonLocalPointerInfo - This map stores the cached results of doing
     /// a pointer lookup at the bottom of a block.  The key of this map is the
     /// pointer+isload bit, the value is a list of <bb->result> mappings.
-    typedef DenseMap<ValueIsLoadPair,
-      std::pair<BasicBlock*, NonLocalDepInfo> > CachedNonLocalPointerInfo;
+    typedef DenseMap<ValueIsLoadPair, std::pair<BBSkipFirstBlockPair, 
+                  NonLocalDepInfo> > CachedNonLocalPointerInfo;
     CachedNonLocalPointerInfo NonLocalPointerDeps;
 
     // A map from instructions to their non-local pointer dependencies.
@@ -259,10 +265,11 @@
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,
                                            BasicBlock *BB);
-    void getNonLocalPointerDepFromBB(Value *Pointer, uint64_t Size,
+    bool getNonLocalPointerDepFromBB(Value *Pointer, uint64_t Size,
                                      bool isLoad, BasicBlock *BB,
                                      SmallVectorImpl<NonLocalDepEntry> &Result,
-                                     SmallPtrSet<BasicBlock*, 64> &Visited);
+                                     DenseMap<BasicBlock*, Value*> &Visited,
+                                     bool SkipFirstBlock = false);
     MemDepResult GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
                                          bool isLoad, BasicBlock *BB,
                                          NonLocalDepInfo *Cache,

Modified: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp?rev=61022&r1=61021&r2=61022&view=diff

==============================================================================
--- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp Sun Dec 14 21:35:32 2008
@@ -382,8 +382,7 @@
   
   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
   bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
-  
-  // Visited checked first, vector in sorted order.
+
   SmallPtrSet<BasicBlock*, 64> Visited;
   
   unsigned NumSortedEntries = Cache.size();
@@ -487,10 +486,16 @@
   const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
   uint64_t PointeeSize = TD->getTypeStoreSize(EltTy);
   
-  // While we have blocks to analyze, get their values.
-  SmallPtrSet<BasicBlock*, 64> Visited;
-  getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
-                              Result, Visited);
+  // This is the set of blocks we've inspected, and the pointer we consider in
+  // each block.  Because of critical edges, we currently bail out if querying
+  // a block with multiple different pointers.  This can happen during PHI
+  // translation.
+  DenseMap<BasicBlock*, Value*> Visited;
+  if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
+                                   Result, Visited, true))
+    return;
+  Result.push_back(std::make_pair(FromBB,
+                                  MemDepResult::getClobber(FromBB->begin())));
 }
 
 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with
@@ -566,35 +571,51 @@
 }
 
 
-/// getNonLocalPointerDepFromBB - 
-void MemoryDependenceAnalysis::
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true).  In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason.  This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
 getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
                             bool isLoad, BasicBlock *StartBB,
                             SmallVectorImpl<NonLocalDepEntry> &Result,
-                            SmallPtrSet<BasicBlock*, 64> &Visited) {
+                            DenseMap<BasicBlock*, Value*> &Visited,
+                            bool SkipFirstBlock) {
+  
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer, isLoad);
   
-  std::pair<BasicBlock*, NonLocalDepInfo> &CacheInfo =
-    NonLocalPointerDeps[CacheKey];
-  NonLocalDepInfo *Cache = &CacheInfo.second;
+  std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
+    &NonLocalPointerDeps[CacheKey];
+  NonLocalDepInfo *Cache = &CacheInfo->second;
 
   // If we have valid cached information for exactly the block we are
   // investigating, just return it with no recomputation.
-  if (CacheInfo.first == StartBB) {
+  if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I)
       if (!I->second.isNonLocal())
         Result.push_back(*I);
     ++NumCacheCompleteNonLocalPtr;
-    return;
+    return false;
   }
   
   // Otherwise, either this is a new block, a block with an invalid cache
   // pointer or one that we're about to invalidate by putting more info into it
   // than its valid cache info.  If empty, the result will be valid cache info,
   // otherwise it isn't.
-  CacheInfo.first = Cache->empty() ? StartBB : 0;
+  if (Cache->empty())
+    CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+  else
+    CacheInfo->first = BBSkipFirstBlockPair();
   
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(StartBB);
@@ -606,23 +627,14 @@
   // revisit blocks after we insert info for them.
   unsigned NumSortedEntries = Cache->size();
   
-  // SkipFirstBlock - If this is the very first block that we're processing, we
-  // don't want to scan or think about its body, because the client was supposed
-  // to do a local dependence query.  Instead, just start processing it by
-  // adding its predecessors to the worklist and iterating.
-  bool SkipFirstBlock = Visited.empty();
-  
   while (!Worklist.empty()) {
     BasicBlock *BB = Worklist.pop_back_val();
     
     // Skip the first block if we have it.
-    if (SkipFirstBlock) {
-      SkipFirstBlock = false;
-    } else {
+    if (!SkipFirstBlock) {
       // Analyze the dependency of *Pointer in FromBB.  See if we already have
       // been here.
-      if (!Visited.insert(BB))
-        continue;
+      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
 
       // Get the dependency info for Pointer in BB.  If we have cached
       // information, we will use it, otherwise we compute it.
@@ -636,11 +648,123 @@
       }
     }
     
-    // Otherwise, we have to process all the predecessors of this block to scan
-    // them as well.
-    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
-      // TODO: PHI TRANSLATE.
-      Worklist.push_back(*PI);
+    // If 'Pointer' is an instruction defined in this block, then we need to do
+    // phi translation to change it into a value live in the predecessor block.
+    // If phi translation fails, then we can't continue dependence analysis.
+    Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
+    bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
+    
+    // If no PHI translation is needed, just add all the predecessors of this
+    // block to scan them as well.
+    if (!NeedsPHITranslation) {
+      SkipFirstBlock = false;
+      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+        // Verify that we haven't looked at this block yet.
+        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+          InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
+        if (InsertRes.second) {
+          // First time we've looked at *PI.
+          Worklist.push_back(*PI);
+          continue;
+        }
+        
+        // If we have seen this block before, but it was with a different
+        // pointer then we have a phi translation failure and we have to treat
+        // this as a clobber.
+        if (InsertRes.first->second != Pointer)
+          goto PredTranslationFailure;
+      }
+      continue;
+    }
+    
+    // If we do need to do phi translation, then there are a bunch of different
+    // cases, because we have to find a Value* live in the predecessor block. We
+    // know that PtrInst is defined in this block at least.
+    
+    // If this is directly a PHI node, just use the incoming values for each
+    // pred as the phi translated version.
+    if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
+      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI){
+        BasicBlock *Pred = *PI;
+        Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
+        
+        // Check to see if we have already visited this pred block with another
+        // pointer.  If so, we can't do this lookup.  This failure can occur
+        // with PHI translation when a critical edge exists and the PHI node in
+        // the successor translates to a pointer value different than the
+        // pointer the block was first analyzed with.
+        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+          InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
+
+        if (!InsertRes.second) {
+          // If the predecessor was visited with PredPtr, then we already did
+          // the analysis and can ignore it.
+          if (InsertRes.first->second == PredPtr)
+            continue;
+          
+          // Otherwise, the block was previously analyzed with a different
+          // pointer.  We can't represent the result of this case, so we just
+          // treat this as a phi translation failure.
+          goto PredTranslationFailure;
+        }
+        
+        // If we have a problem phi translating, fall through to the code below
+        // to handle the failure condition.
+        if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
+                                        Result, Visited))
+          goto PredTranslationFailure;
+      }
+      
+      // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+      CacheInfo = &NonLocalPointerDeps[CacheKey];
+      Cache = &CacheInfo->second;
+      
+      // Since we did phi translation, the "Cache" set won't contain all of the
+      // results for the query.  This is ok (we can still use it to accelerate
+      // specific block queries) but we can't do the fastpath "return all
+      // results from the set"  Clear out the indicator for this.
+      CacheInfo->first = BBSkipFirstBlockPair();
+      SkipFirstBlock = false;
+      continue;
+    }
+    
+    // TODO: BITCAST, GEP.
+    
+    //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+    //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+    //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+  PredTranslationFailure:
+    
+    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+    CacheInfo = &NonLocalPointerDeps[CacheKey];
+    Cache = &CacheInfo->second;
+    
+    // Since we did phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set"  Clear out the indicator for this.
+    CacheInfo->first = BBSkipFirstBlockPair();
+    
+    // If *nothing* works, mark the pointer as being clobbered by the first
+    // instruction in this block.
+    //
+    // If this is the magic first block, return this as a clobber of the whole
+    // incoming value.  Since we can't phi translate to one of the predecessors,
+    // we have to bail out.
+    if (SkipFirstBlock)
+      return true;
+    
+    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+      assert(I != Cache->rend() && "Didn't find current block??");
+      if (I->first != BB)
+        continue;
+      
+      assert(I->second.isNonLocal() &&
+             "Should only be here with transparent block");
+      I->second = MemDepResult::getClobber(BB->begin());
+      ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey.getOpaqueValue());
+      Result.push_back(*I);
+      break;
     }
   }
   
@@ -658,19 +782,22 @@
     Cache->insert(Entry, Val);
     // FALL THROUGH.
   }
-  case 1: {
+  case 1:
     // One new entry, Just insert the new value at the appropriate position.
-    NonLocalDepEntry Val = Cache->back();
-    Cache->pop_back();
-    NonLocalDepInfo::iterator Entry =
-      std::upper_bound(Cache->begin(), Cache->end(), Val);
-    Cache->insert(Entry, Val);
+    if (Cache->size() != 1) {
+      NonLocalDepEntry Val = Cache->back();
+      Cache->pop_back();
+      NonLocalDepInfo::iterator Entry =
+        std::upper_bound(Cache->begin(), Cache->end(), Val);
+      Cache->insert(Entry, Val);
+    }
     break;
-  }
   default:
     // Added many values, do a full scale sort.
     std::sort(Cache->begin(), Cache->end());
   }
+  
+  return false;
 }
 
 /// RemoveCachedNonLocalPointerDependencies - If P exists in
@@ -851,7 +978,7 @@
       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
       
       // The cache is not valid for any specific block anymore.
-      NonLocalPointerDeps[P].first = 0;
+      NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
       
       // Update any entries for RemInst to use the instruction after it.
       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();

Modified: llvm/trunk/test/Transforms/GVN/rle-must-alias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/rle-must-alias.ll?rev=61022&r1=61021&r2=61022&view=diff

==============================================================================
--- llvm/trunk/test/Transforms/GVN/rle-must-alias.ll (original)
+++ llvm/trunk/test/Transforms/GVN/rle-must-alias.ll Sun Dec 14 21:35:32 2008
@@ -1,4 +1,9 @@
 ; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {DEAD.rle = phi i32}
+; XFAIL: *
+
+; FIXME: GVN should eliminate the fully redundant %9 GEP which 
+; allows DEAD to be removed.  This is PR3198.
+
 ; The %7 and %4 loads combine to make %DEAD unneeded.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"

Modified: llvm/trunk/test/Transforms/GVN/rle-no-phi-translate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/rle-no-phi-translate.ll?rev=61022&r1=61021&r2=61022&view=diff

==============================================================================
--- llvm/trunk/test/Transforms/GVN/rle-no-phi-translate.ll (original)
+++ llvm/trunk/test/Transforms/GVN/rle-no-phi-translate.ll Sun Dec 14 21:35:32 2008
@@ -1,4 +1,7 @@
 ; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep load
+; FIXME: This should be promotable, but memdep/gvn don't track values
+; path/edge sensitively enough.
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 

Added: llvm/trunk/test/Transforms/GVN/rle-phi-translate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/rle-phi-translate.ll?rev=61022&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/GVN/rle-phi-translate.ll (added)
+++ llvm/trunk/test/Transforms/GVN/rle-phi-translate.ll Sun Dec 14 21:35:32 2008
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%cv.rle = phi i32}
+; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%bv.rle = phi i32}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @g(i32* %b, i32* %c) nounwind {
+entry:
+	%g = alloca i32		; <i32*> [#uses=4]
+	%t1 = icmp eq i32* %b, null		; <i1> [#uses=1]
+	br i1 %t1, label %bb, label %bb1
+
+bb:		; preds = %entry
+	%t2 = load i32* %c, align 4		; <i32> [#uses=1]
+	%t3 = add i32 %t2, 1		; <i32> [#uses=1]
+	store i32 %t3, i32* %g, align 4
+	br label %bb2
+
+bb1:		; preds = %entry
+	%t5 = load i32* %b, align 4		; <i32> [#uses=1]
+	%t6 = add i32 %t5, 1		; <i32> [#uses=1]
+	store i32 %t6, i32* %g, align 4
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb
+	%c_addr.0 = phi i32* [ %g, %bb1 ], [ %c, %bb ]		; <i32*> [#uses=1]
+	%b_addr.0 = phi i32* [ %b, %bb1 ], [ %g, %bb ]		; <i32*> [#uses=1]
+	%cv = load i32* %c_addr.0, align 4		; <i32> [#uses=1]
+	%bv = load i32* %b_addr.0, align 4		; <i32> [#uses=1]
+	%ret = add i32 %cv, %bv		; <i32> [#uses=1]
+	ret i32 %ret
+}
+





More information about the llvm-commits mailing list