[llvm-commits] [llvm] r130390 - in /llvm/trunk: include/llvm/Analysis/MemoryDependenceAnalysis.h lib/Analysis/MemoryDependenceAnalysis.cpp lib/Transforms/Scalar/GVN.cpp test/Transforms/GVN/rle.ll

Chris Lattner sabre at nondot.org
Thu Apr 28 00:29:08 PDT 2011


Author: lattner
Date: Thu Apr 28 02:29:08 2011
New Revision: 130390

URL: http://llvm.org/viewvc/llvm-project?rev=130390&view=rev
Log:
teach GVN to widen integer loads when they are overaligned, when doing an 
wider load would allow elimination of subsequent loads, and when the wider
load is still a native integer type.  This eliminates a ton of loads on 
various benchmarks involving struct fields, though it is somewhat hobbled
by clang not being very aggressive about field alignment.

This is yet another step along the way towards resolving PR6627.


Modified:
    llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
    llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
    llvm/trunk/lib/Transforms/Scalar/GVN.cpp
    llvm/trunk/test/Transforms/GVN/rle.ll

Modified: llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h?rev=130390&r1=130389&r2=130390&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h Thu Apr 28 02:29:08 2011
@@ -355,6 +355,20 @@
                                           BasicBlock::iterator ScanIt,
                                           BasicBlock *BB);
     
+    
+    /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+    /// looks at a memory location for a load (specified by MemLocBase, Offs,
+    /// and Size) and compares it against a load.  If the specified load could
+    /// be safely widened to a larger integer load that is 1) still efficient,
+    /// 2) safe for the target, and 3) would provide the specified memory
+    /// location value, then this function returns the size in bytes of the
+    /// load width to use.  If not, this returns zero.
+    static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
+                                                    int64_t MemLocOffs,
+                                                    unsigned MemLocSize,
+                                                    const LoadInst *LI,
+                                                    const TargetData &TD);
+    
   private:
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,

Modified: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp?rev=130390&r1=130389&r2=130390&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp Thu Apr 28 02:29:08 2011
@@ -231,7 +231,8 @@
 isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
                                        const Value *&MemLocBase,
                                        int64_t &MemLocOffs,
-                                       const LoadInst *LI, TargetData *TD) {
+                                       const LoadInst *LI,
+                                       const TargetData *TD) {
   // If we have no target data, we can't do this.
   if (TD == 0) return false;
 
@@ -239,14 +240,34 @@
   if (MemLocBase == 0)
     MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
 
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+                                    LI, *TD);
+  return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load.  If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use.  If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+                                unsigned MemLocSize, const LoadInst *LI,
+                                const TargetData &TD) {
+  // We can only extend non-volatile integer loads.
+  if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+  
   // Get the base of this load.
   int64_t LIOffs = 0;
   const Value *LIBase = 
-    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, *TD);
+    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
   
   // If the two pointers are not based on the same pointer, we can't tell that
   // they are related.
-  if (LIBase != MemLocBase) return false;
+  if (LIBase != MemLocBase) return 0;
   
   // Okay, the two values are based on the same pointer, but returned as
   // no-alias.  This happens when we have things like two byte loads at "P+1"
@@ -255,7 +276,7 @@
   // the bits required by MemLoc.
   
   // If MemLoc is before LI, then no widening of LI will help us out.
-  if (MemLocOffs < LIOffs) return false;
+  if (MemLocOffs < LIOffs) return 0;
   
   // Get the alignment of the load in bytes.  We assume that it is safe to load
   // any legal integer up to this size without a problem.  For example, if we're
@@ -264,10 +285,10 @@
   // to i16.
   unsigned LoadAlign = LI->getAlignment();
 
-  int64_t MemLocEnd = MemLocOffs+MemLoc.Size;
+  int64_t MemLocEnd = MemLocOffs+MemLocSize;
   
   // If no amount of rounding up will let MemLoc fit into LI, then bail out.
-  if (LIOffs+LoadAlign < MemLocEnd) return false;
+  if (LIOffs+LoadAlign < MemLocEnd) return 0;
   
   // This is the size of the load to try.  Start with the next larger power of
   // two.
@@ -278,17 +299,17 @@
     // If this load size is bigger than our known alignment or would not fit
     // into a native integer register, then we fail.
     if (NewLoadByteSize > LoadAlign ||
-        !TD->fitsInLegalInteger(NewLoadByteSize*8))
-      return false;
+        !TD.fitsInLegalInteger(NewLoadByteSize*8))
+      return 0;
 
     // If a load of this width would include all of MemLoc, then we succeed.
     if (LIOffs+NewLoadByteSize >= MemLocEnd)
-      return true;
+      return NewLoadByteSize;
     
     NewLoadByteSize <<= 1;
   }
   
-  return false;
+  return 0;
 }
 
 /// getPointerDependencyFrom - Return the instruction on which a memory

Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=130390&r1=130389&r2=130390&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Thu Apr 28 02:29:08 2011
@@ -629,10 +629,11 @@
   if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
     return 0;
   
+  // If this is already the right type, just return it.
   const Type *StoredValTy = StoredVal->getType();
   
   uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
-  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+  uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
   
   // If the store and reload are the same size, we can always reuse it.
   if (StoreSize == LoadSize) {
@@ -806,7 +807,21 @@
   
   Value *DepPtr = DepLI->getPointerOperand();
   uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
-  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+  int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+  if (R != -1) return R;
+  
+  // If we have a load/load clobber an DepLI can be widened to cover this load,
+  // then we should widen it!
+  int64_t LoadOffs = 0;
+  const Value *LoadBase =
+    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+  if (Size == 0) return -1;
+  
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
 }
 
 
@@ -858,9 +873,9 @@
 
 /// GetStoreValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.  This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias.  Check this case to see if there is
-/// anything more we can do before we give up.
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias.  Check this case to see if there is anything more we can do
+/// before we give up.
 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
                                    const Type *LoadTy,
                                    Instruction *InsertPt, const TargetData &TD){
@@ -896,6 +911,60 @@
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
 }
 
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load.  This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+                                  const Type *LoadTy,
+                                  Instruction *InsertPt, const TargetData &TD,
+                                  MemoryDependenceAnalysis &MD) {
+  // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+  // widen SrcVal out to a larger load.
+  unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  if (Offset+LoadSize > SrcValSize) {
+    assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+    assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+    // If we have a load/load clobber an DepLI can be widened to cover this
+    // load, then we should widen it to the next power of 2 size big enough!
+    unsigned NewLoadSize = Offset+LoadSize;
+    if (!isPowerOf2_32(NewLoadSize))
+      NewLoadSize = NextPowerOf2(NewLoadSize);
+
+    Value *PtrVal = SrcVal->getPointerOperand();
+    
+    IRBuilder<> Builder(SrcVal->getParent(), SrcVal);
+    const Type *DestPTy = 
+      IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+    DestPTy = PointerType::get(DestPTy, 
+                       cast<PointerType>(PtrVal->getType())->getAddressSpace());
+    
+    PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+    NewLoad->takeName(SrcVal);
+    NewLoad->setAlignment(SrcVal->getAlignment());
+    
+    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+    DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+    
+    // Replace uses of the original load with the wider load.  On a big endian
+    // system, we need to shift down to get the relevant bits.
+    Value *RV = NewLoad;
+    if (TD.isBigEndian())
+      RV = Builder.CreateLShr(RV,
+                    NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+    RV = Builder.CreateTrunc(RV, SrcVal->getType());
+    SrcVal->replaceAllUsesWith(RV);
+    MD.removeInstruction(SrcVal);
+    SrcVal = NewLoad;
+  }
+  
+  return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
 /// GetMemInstValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering mem intrinsic.
 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
@@ -958,11 +1027,12 @@
   BasicBlock *BB;
   enum ValType {
     SimpleVal,  // A simple offsetted value that is accessed.
+    LoadVal,    // A value produced by a load.
     MemIntrin   // A memory intrinsic which is loaded from.
   };
   
   /// V - The value that is live out of the block.
-  PointerIntPair<Value *, 1, ValType> Val;
+  PointerIntPair<Value *, 2, ValType> Val;
   
   /// Offset - The byte offset in Val that is interesting for the load query.
   unsigned Offset;
@@ -987,21 +1057,40 @@
     return Res;
   }
   
+  static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+                                       unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(LI);
+    Res.Val.setInt(LoadVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
   bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
   Value *getSimpleValue() const {
     assert(isSimpleValue() && "Wrong accessor");
     return Val.getPointer();
   }
   
+  LoadInst *getCoercedLoadValue() const {
+    assert(isCoercedLoadValue() && "Wrong accessor");
+    return cast<LoadInst>(Val.getPointer());
+  }
+  
   MemIntrinsic *getMemIntrinValue() const {
-    assert(!isSimpleValue() && "Wrong accessor");
+    assert(isMemIntrinValue() && "Wrong accessor");
     return cast<MemIntrinsic>(Val.getPointer());
   }
   
   /// MaterializeAdjustedValue - Emit code into this block to adjust the value
   /// defined here to the specified type.  This handles various coercion cases.
   Value *MaterializeAdjustedValue(const Type *LoadTy,
-                                  const TargetData *TD) const {
+                                  const TargetData *TD,
+                                  MemoryDependenceAnalysis &MD) const {
     Value *Res;
     if (isSimpleValue()) {
       Res = getSimpleValue();
@@ -1010,14 +1099,27 @@
         Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
                                    *TD);
         
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
                      << *getSimpleValue() << '\n'
                      << *Res << '\n' << "\n\n\n");
       }
+    } else if (isCoercedLoadValue()) {
+      LoadInst *Load = getCoercedLoadValue();
+      if (Load->getType() == LoadTy && Offset == 0) {
+        Res = Load;
+      } else {
+        assert(TD && "Need target data to handle type mismatch case");
+        Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+                                  *TD, MD);
+        
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+                     << *getCoercedLoadValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
     } else {
       Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
                                    LoadTy, BB->getTerminator(), *TD);
-      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                    << "  " << *getMemIntrinValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
@@ -1025,7 +1127,7 @@
   }
 };
 
-}
+} // end anonymous namespace
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
@@ -1034,12 +1136,13 @@
                          SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
                                      const TargetData *TD,
                                      const DominatorTree &DT,
-                                     AliasAnalysis *AA) {
+                                     AliasAnalysis *AA,
+                                     MemoryDependenceAnalysis &MD) {
   // Check for the fully redundant, dominating load case.  In this case, we can
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 && 
       DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
-    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD, MD);
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1055,7 +1158,7 @@
     if (SSAUpdate.HasValueForBlock(BB))
       continue;
 
-    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD,MD));
   }
   
   // Perform PHI construction.
@@ -1159,8 +1262,8 @@
                                                      DepLI, *TD);
           
           if (Offset != -1) {
-            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, DepLI,
-                                                                Offset));
+            ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+                                                                    Offset));
             continue;
           }
         }
@@ -1223,7 +1326,7 @@
           continue;
         }          
       }
-      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+      ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
       continue;
     }
     
@@ -1243,7 +1346,7 @@
     
     // Perform PHI construction.
     Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
-                                      VN.getAliasAnalysis());
+                                      VN.getAliasAnalysis(), *MD);
     LI->replaceAllUsesWith(V);
 
     if (isa<PHINode>(V))
@@ -1466,7 +1569,7 @@
 
   // Perform PHI construction.
   Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
-                                    VN.getAliasAnalysis());
+                                    VN.getAliasAnalysis(), *MD);
   LI->replaceAllUsesWith(V);
   if (isa<PHINode>(V))
     V->takeName(LI);
@@ -1527,7 +1630,7 @@
                                                  L->getPointerOperand(),
                                                  DepLI, *TD);
       if (Offset != -1)
-        AvailVal = GetStoreValueForLoad(DepLI, Offset, L->getType(), L, *TD);
+        AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *TD, *MD);
     }
     
     // If the clobbering value is a memset/memcpy/memmove, see if we can forward

Modified: llvm/trunk/test/Transforms/GVN/rle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/rle.ll?rev=130390&r1=130389&r2=130390&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/GVN/rle.ll (original)
+++ llvm/trunk/test/Transforms/GVN/rle.ll Thu Apr 28 02:29:08 2011
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 ; 32-bit little endian target.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
 ;; Trivial RLE test.
 define i32 @test0(i32 %V, i32* %P) {
@@ -593,4 +593,27 @@
 }
 
 
+;;===----------------------------------------------------------------------===;;
+;; Load Widening
+;;===----------------------------------------------------------------------===;;
+
+%widening1 = type { i32, i8, i8 }
+
+ at f = global %widening1 zeroinitializer, align 4
+
+define i32 @test_widening1() nounwind ssp noredzone {
+entry:
+  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening1
+; CHECK-NOT: load
+; CHECK: load i16*
+; CHECK-NOT: load
+; CHECK-ret i32
+}
+
 





More information about the llvm-commits mailing list