[llvm-commits] [llvm] r122678 - in /llvm/trunk: lib/Transforms/Scalar/LoopIdiomRecognize.cpp test/Transforms/LoopIdiom/basic.ll

Chris Lattner sabre at nondot.org
Sat Jan 1 19:37:56 PST 2011


Author: lattner
Date: Sat Jan  1 21:37:56 2011
New Revision: 122678

URL: http://llvm.org/viewvc/llvm-project?rev=122678&view=rev
Log:
teach loop idiom recognition to form memcpy's from simple loops.

Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/trunk/test/Transforms/LoopIdiom/basic.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=122678&r1=122677&r2=122678&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Jan  1 21:37:56 2011
@@ -49,7 +49,11 @@
                                       Value *SplatValue,
                                       const SCEVAddRecExpr *Ev,
                                       const SCEV *BECount);
-    
+    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                                    const SCEVAddRecExpr *StoreEv,
+                                    const SCEVAddRecExpr *LoadEv,
+                                    const SCEV *BECount);
+      
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.
     ///
@@ -172,14 +176,15 @@
   // See if the pointer expression is an AddRec like {base,+,1} on the current
   // loop, which indicates a strided store.  If we have something else, it's a
   // random store we can't handle.
-  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
-  if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+  const SCEVAddRecExpr *StoreEv =
+    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
     return false;
 
   // Check to see if the stride matches the size of the store.  If so, then we
   // know that every byte is touched in the loop.
   unsigned StoreSize = (unsigned)SizeInBits >> 3; 
-  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
   
   // TODO: Could also handle negative stride here someday, that will require the
   // validity check in mayLoopModRefLocation to be updated though.
@@ -190,10 +195,22 @@
   // turned into a memset of i8 -1, assuming that all the consequtive bytes
   // are stored.  A store of i32 0x01020304 can never be turned into a memset.
   if (Value *SplatValue = isBytewiseValue(StoredVal))
-    return processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, Ev, BECount);
-
-  // Handle the memcpy case here.
- // errs() << "Found strided store: " << *Ev << "\n";
+    if (processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, StoreEv,
+                                     BECount))
+      return true;
+
+  // If the stored value is a strided load in the same loop with the same stride
+  // this this may be transformable into a memcpy.  This kicks in for stuff like
+  //   for (i) A[i] = B[i];
+  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+    const SCEVAddRecExpr *LoadEv =
+      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+        StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+        return true;
+  }
+ // errs() << "UNHANDLED strided store: " << *Ev << " - " << *SI << "\n";
 
   return false;
 }
@@ -201,8 +218,9 @@
 /// mayLoopModRefLocation - Return true if the specified loop might do a load or
 /// store to the same location that the specified store could store to, which is
 /// a loop-strided access. 
-static bool mayLoopModRefLocation(StoreInst *SI, Loop *L, const SCEV *BECount,
-                                  unsigned StoreSize, AliasAnalysis &AA) {
+static bool mayLoopModRefLocation(Value *Ptr, Loop *L, const SCEV *BECount,
+                                  unsigned StoreSize, AliasAnalysis &AA,
+                                  StoreInst *IgnoredStore) {
   // Get the location that may be stored across the loop.  Since the access is
   // strided positively through memory, we say that the modified location starts
   // at the pointer and has infinite size.
@@ -217,12 +235,13 @@
   // operand in the store.  Store to &A[i] of 100 will always return may alias
   // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
   // which will then no-alias a store to &A[100].
-  AliasAnalysis::Location StoreLoc(SI->getPointerOperand(), AccessSize);
+  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
 
   for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
        ++BI)
     for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
-      if (AA.getModRefInfo(I, StoreLoc) != AliasAnalysis::NoModRef)
+      if (&*I != IgnoredStore &&
+          AA.getModRefInfo(I, StoreLoc) != AliasAnalysis::NoModRef)
         return true;
 
   return false;
@@ -239,21 +258,13 @@
   if (!CurLoop->isLoopInvariant(SplatValue))
     return false;
   
-  // Temporarily remove the store from the loop, to avoid the mod/ref query from
-  // seeing it.
-  Instruction *InstAfterStore = ++BasicBlock::iterator(SI);
-  SI->removeFromParent();
-  
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
   // or write to the aliased location.  Check for an alias.
-  bool Unsafe = mayLoopModRefLocation(SI, CurLoop, BECount, StoreSize,
-                                      getAnalysis<AliasAnalysis>());
-
-  SI->insertBefore(InstAfterStore);
-  
-  if (Unsafe) return false;
+  if (mayLoopModRefLocation(SI->getPointerOperand(), CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), SI))
+    return false;
   
   // Okay, everything looks good, insert the memset.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
@@ -301,3 +312,72 @@
   return true;
 }
 
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                           const SCEVAddRecExpr *StoreEv,
+                           const SCEVAddRecExpr *LoadEv,
+                           const SCEV *BECount) {
+  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+  
+  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
+  // this into a memcmp in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the aliased location (including the load feeding the stores).
+  // Check for an alias.
+  if (mayLoopModRefLocation(SI->getPointerOperand(), CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), SI))
+    return false;
+  
+  // Okay, everything looks good, insert the memcpy.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  
+  IRBuilder<> Builder(Preheader->getTerminator());
+  
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  Just insert code for it in the preheader.
+  SCEVExpander Expander(*SE);
+
+  Value *LoadBasePtr = 
+    Expander.expandCodeFor(LoadEv->getStart(),
+                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+  Value *StoreBasePtr = 
+    Expander.expandCodeFor(StoreEv->getStart(),
+                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+  
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+  unsigned BESize = SE->getTypeSizeInBits(BECount->getType());
+  if (BESize < TD->getPointerSizeInBits())
+    BECount = SE->getZeroExtendExpr(BECount, IntPtr);
+  else if (BESize > TD->getPointerSizeInBits())
+    BECount = SE->getTruncateExpr(BECount, IntPtr);
+  
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         true, true /*nooverflow*/);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               true, true /*nooverflow*/);
+  
+  Value *NumBytes =
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+  
+  Value *NewCall =
+    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+                         std::min(SI->getAlignment(), LI->getAlignment()));
+  
+  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
+               << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+               << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+  (void)NewCall;
+  
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  DeleteDeadInstruction(SI, *SE);
+  return true;
+}

Modified: llvm/trunk/test/Transforms/LoopIdiom/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/basic.ll?rev=122678&r1=122677&r2=122678&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/basic.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/basic.ll Sat Jan  1 21:37:56 2011
@@ -114,3 +114,31 @@
 ; CHECK: ret void
 }
 
+
+;; memcpy formation
+define void @test6(i64 %Size) nounwind ssp {
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  %DestI = getelementptr i8* %Dest, i64 %indvar
+  %V = load i8* %I.0.014, align 1
+  store i8 %V, i8* %DestI, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test6
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+
+





More information about the llvm-commits mailing list