[llvm-commits] [llvm] r122806 - in /llvm/trunk: lib/Transforms/Scalar/LoopIdiomRecognize.cpp test/Transforms/LoopIdiom/basic.ll

Chris Lattner sabre at nondot.org
Mon Jan 3 23:46:34 PST 2011


Author: lattner
Date: Tue Jan  4 01:46:33 2011
New Revision: 122806

URL: http://llvm.org/viewvc/llvm-project?rev=122806&view=rev
Log:
Teach loop-idiom to turn a loop containing a memset into a larger memset
when safe.

The testcase is basically this nested loop:
void foo(char *X) {
  for (int i = 0; i != 100; ++i) 
    for (int j = 0; j != 100; ++j)
      X[j+i*100] = 0;
}

which gets turned into a single memset now.  clang -O3 doesn't optimize
this yet though due to a phase ordering issue I haven't analyzed yet.


Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/trunk/test/Transforms/LoopIdiom/basic.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=122806&r1=122805&r2=122806&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Tue Jan  4 01:46:33 2011
@@ -39,6 +39,7 @@
 
 #define DEBUG_TYPE "loop-idiom"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -72,9 +73,11 @@
                         SmallVectorImpl<BasicBlock*> &ExitBlocks);
 
     bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
     
-    bool processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
-                                      Value *SplatValue,
+    bool processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
+                                      unsigned StoreAlignment,
+                                      Value *SplatValue, Instruction *TheStore,
                                       const SCEVAddRecExpr *Ev,
                                       const SCEV *BECount);
     bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
@@ -210,27 +213,39 @@
     Instruction *Inst = I++;
     // Look for store instructions, which may be optimized to memset/memcpy.
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
-      if (SI->isVolatile()) continue;
-    
       WeakVH InstPtr(I);
       if (!processLoopStore(SI, BECount)) continue;
       MadeChange = true;
       
       // If processing the store invalidated our iterator, start over from the
-      // head of the loop.
+      // top of the block.
       if (InstPtr == 0)
         I = BB->begin();
       continue;
     }
     
+    // Look for memset instructions, which may be optimized to a larger memset.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopMemSet(MSI, BECount)) continue;
+      MadeChange = true;
+      
+      // If processing the memset invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
   }
   
   return MadeChange;
 }
 
 
-/// scanBlock - Look over a block to see if we can promote anything out of it.
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
 bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+  if (SI->isVolatile()) return false;
+
   Value *StoredVal = SI->getValueOperand();
   Value *StorePtr = SI->getPointerOperand();
   
@@ -261,8 +276,8 @@
   // turned into a memset of i8 -1, assuming that all the consequtive bytes
   // are stored.  A store of i32 0x01020304 can never be turned into a memset.
   if (Value *SplatValue = isBytewiseValue(StoredVal))
-    if (processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, StoreEv,
-                                     BECount))
+    if (processLoopStoreOfSplatValue(StorePtr, StoreSize, SI->getAlignment(),
+                                     SplatValue, SI, StoreEv, BECount))
       return true;
 
   // If the stored value is a strided load in the same loop with the same stride
@@ -281,13 +296,48 @@
   return false;
 }
 
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+  // We can only handle non-volatile memsets with a constant size.
+  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+  Value *Pointer = MSI->getDest();
+  
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+  if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+    return false;
+
+  // Reject memsets that are so large that they overflow an unsigned.
+  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+  if ((SizeInBytes >> 32) != 0)
+    return false;
+  
+  // Check to see if the stride matches the size of the memset.  If so, then we
+  // know that every byte is touched in the loop.
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+  
+  // TODO: Could also handle negative stride here someday, that will require the
+  // validity check in mayLoopAccessLocation to be updated though.
+  if (Stride == 0 || MSI->getLength() != Stride->getValue())
+    return false;
+  
+  return processLoopStoreOfSplatValue(Pointer, (unsigned)SizeInBytes,
+                                      MSI->getAlignment(), MSI->getValue(),
+                                      MSI, Ev, BECount);
+}
+
+
 /// mayLoopAccessLocation - Return true if the specified loop might access the
 /// specified pointer location, which is a loop-strided access.  The 'Access'
 /// argument specifies what the verboten forms of access are (read or write).
 static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
                                   Loop *L, const SCEV *BECount,
                                   unsigned StoreSize, AliasAnalysis &AA,
-                                  StoreInst *IgnoredStore) {
+                                  Instruction *IgnoredStore) {
   // Get the location that may be stored across the loop.  Since the access is
   // strided positively through memory, we say that the modified location starts
   // at the pointer and has infinite size.
@@ -317,8 +367,9 @@
 /// processLoopStoreOfSplatValue - We see a strided store of a memsetable value.
 /// If we can transform this into a memset in the loop preheader, do so.
 bool LoopIdiomRecognize::
-processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize,
-                             Value *SplatValue,
+processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize, 
+                             unsigned StoreAlignment, Value *SplatValue, 
+                             Instruction *TheStore,
                              const SCEVAddRecExpr *Ev, const SCEV *BECount) {
   // Verify that the stored value is loop invariant.  If not, we can't promote
   // the memset.
@@ -329,9 +380,9 @@
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
   // or write to the aliased location.  Check for an alias.
-  if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+  if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
                             CurLoop, BECount,
-                            StoreSize, getAnalysis<AliasAnalysis>(), SI))
+                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
     return false;
   
   // Okay, everything looks good, insert the memset.
@@ -344,14 +395,14 @@
   // header.  Just insert code for it in the preheader.
   SCEVExpander Expander(*SE);
   
-  unsigned AddrSpace = SI->getPointerAddressSpace();
+  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
   Value *BasePtr = 
     Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
                            Preheader->getTerminator());
   
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
-  const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+  const Type *IntPtr = TD->getIntPtrType(SplatValue->getContext());
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
   
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -364,15 +415,15 @@
     Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
   
   Value *NewCall =
-    Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, SI->getAlignment());
+    Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
   
   DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
-               << "    from store to: " << *Ev << " at: " << *SI << "\n");
+               << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
   (void)NewCall;
   
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
-  DeleteDeadInstruction(SI, *SE);
+  DeleteDeadInstruction(TheStore, *SE);
   ++NumMemSet;
   return true;
 }

Modified: llvm/trunk/test/Transforms/LoopIdiom/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/basic.ll?rev=122806&r1=122805&r2=122806&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/basic.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/basic.ll Tue Jan  4 01:46:33 2011
@@ -240,3 +240,36 @@
 ; CHECK: ret void
 }
 
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8* %X) nounwind ssp {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry, %for.inc10
+  %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
+  br label %for.body5
+
+for.body5:                                        ; preds = %for.body5, %bb.nph
+  %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+  %mul = mul nsw i32 %i.04, 100
+  %add = add nsw i32 %j.02, %mul
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %inc = add nsw i32 %j.02, 1
+  %cmp4 = icmp eq i32 %inc, 100
+  br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10:                                        ; preds = %for.body5
+  %inc12 = add nsw i32 %i.04, 1
+  %cmp = icmp eq i32 %inc12, 100
+  br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13:                                        ; preds = %for.inc10
+  ret void
+; CHECK: @test10
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}





More information about the llvm-commits mailing list