[llvm] r251518 - Reapply: [LIR] Add support for creating memsets from loops with a negative stride.

Chad Rosier via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 28 07:38:50 PDT 2015


Author: mcrosier
Date: Wed Oct 28 09:38:49 2015
New Revision: 251518

URL: http://llvm.org/viewvc/llvm-project?rev=251518&view=rev
Log:
Reapply: [LIR] Add support for creating memsets from loops with a negative stride.

The simple fix is to prevent forming memcpy from loops with a negative stride.

Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/trunk/test/Transforms/LoopIdiom/basic.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=251518&r1=251517&r2=251518&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Wed Oct 28 09:38:49 2015
@@ -31,11 +31,6 @@
 //   void foo(_Complex float *P)
 //     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
 //
-// We should enhance this to handle negative strides through memory.
-// Alternatively (and perhaps better) we could rely on an earlier pass to force
-// forward iteration through memory, which is generally better for cache
-// behavior.  Negative strides *do* happen for memset/memcpy loops.
-//
 // This could recognize common matrix multiplies and dot product idioms and
 // replace them with calls to BLAS (if linked in??).
 //
@@ -124,7 +119,7 @@ private:
   bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                                unsigned StoreAlignment, Value *SplatValue,
                                Instruction *TheStore, const SCEVAddRecExpr *Ev,
-                               const SCEV *BECount);
+                               const SCEV *BECount, bool NegStride);
   bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                                   const SCEVAddRecExpr *StoreEv,
                                   const SCEVAddRecExpr *LoadEv,
@@ -316,25 +311,27 @@ bool LoopIdiomRecognize::processLoopStor
   // Check to see if the stride matches the size of the store.  If so, then we
   // know that every byte is touched in the loop.
   unsigned StoreSize = (unsigned)SizeInBits >> 3;
-  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
 
-  if (!Stride || StoreSize != Stride->getValue()->getValue()) {
-    // TODO: Could also handle negative stride here someday, that will require
-    // the validity check in mayLoopAccessLocation to be updated though.
-    // Enable this to print exact negative strides.
-    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
-      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
-      dbgs() << "BB: " << *SI->getParent();
-    }
+  const SCEVConstant *ConstStride =
+      dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+  if (!ConstStride)
+    return false;
 
+  APInt Stride = ConstStride->getValue()->getValue();
+  if (StoreSize != Stride && StoreSize != -Stride)
     return false;
-  }
+
+  bool NegStride = StoreSize == -Stride;
 
   // See if we can optimize just this store in isolation.
   if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
-                              StoredVal, SI, StoreEv, BECount))
+                              StoredVal, SI, StoreEv, BECount, NegStride))
     return true;
 
+  // TODO: We don't handle negative stride memcpys.
+  if (NegStride)
+    return false;
+
   // If the stored value is a strided load in the same loop with the same stride
   // this this may be transformable into a memcpy.  This kicks in for stuff like
   //   for (i) A[i] = B[i];
@@ -387,7 +384,7 @@ bool LoopIdiomRecognize::processLoopMemS
 
   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
                                  MSI->getAlignment(), MSI->getValue(), MSI, Ev,
-                                 BECount);
+                                 BECount, /*NegStride=*/false);
 }
 
 /// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -468,7 +465,7 @@ static Constant *getMemSetPatternValue(V
 bool LoopIdiomRecognize::processLoopStridedStore(
     Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
     Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
-    const SCEV *BECount) {
+    const SCEV *BECount, bool NegStride) {
 
   // If the stored value is a byte-wise value (like i32 -1), then it may be
   // turned into a memset of i8 -1, assuming that all the consecutive bytes
@@ -506,15 +503,27 @@ bool LoopIdiomRecognize::processLoopStri
   SCEVExpander Expander(*SE, DL, "loop-idiom");
 
   Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+  Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
+
+  const SCEV *Start = Ev->getStart();
+  // If we have a negative stride, Start refers to the end of the memory
+  // location we're trying to memset.  Therefore, we need to recompute the start
+  // point, which is just Start - BECount*Size.
+  if (NegStride) {
+    const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+    if (StoreSize != 1)
+      Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+                             SCEV::FlagNUW);
+    Start = SE->getMinusSCEV(Ev->getStart(), Index);
+  }
 
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
   // or write to the aliased location.  Check for any overlap by generating the
   // base pointer and checking the region.
-  Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
-                                          Preheader->getTerminator());
-
+  Value *BasePtr =
+      Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
   if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
                             *AA, TheStore)) {
     Expander.clear();
@@ -527,7 +536,6 @@ bool LoopIdiomRecognize::processLoopStri
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
-  Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
 
   const SCEV *NumBytesS =

Modified: llvm/trunk/test/Transforms/LoopIdiom/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/basic.ll?rev=251518&r1=251517&r2=251518&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/basic.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/basic.ll Wed Oct 28 09:38:49 2015
@@ -424,3 +424,83 @@ exit:
   ret void
 ; CHECK: ret void
 }
+
+; Recognize loops with a negative stride.
+define void @test15(i32* nocapture %f) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+  store i32 0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %cmp = icmp sgt i64 %indvars.iv, 0
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+; CHECK-LABEL: @test15(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; Loop with a negative stride.  Verify an aliasing write to f[65536] prevents
+; the creation of a memset.
+define void @test16(i32* nocapture %f) {
+entry:
+  %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+  store i32 0, i32* %arrayidx, align 4
+  store i32 1, i32* %arrayidx1, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %cmp = icmp sgt i64 %indvars.iv, 0
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+; CHECK-LABEL: @test16(
+; CHECK-NOT: call void @llvm.memset.p0i8.i64
+; CHECK: ret void
+}
+
+; We don't handle memcpy-able loops with negative stride.
+define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
+entry:
+  %conv = sext i32 %c to i64
+  %mul = shl nsw i64 %conv, 2
+  %call = tail call noalias i8* @malloc(i64 %mul)
+  %0 = bitcast i8* %call to i32*
+  %tobool.9 = icmp eq i32 %c, 0
+  br i1 %tobool.9, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
+  %dec10 = add nsw i32 %dec10.in, -1
+  %idxprom = sext i32 %dec10 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
+  store i32 %1, i32* %arrayidx2, align 4
+  %tobool = icmp eq i32 %dec10, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  ret i32* %0
+; CHECK-LABEL: @test17(
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret i32*
+}
+
+declare noalias i8* @malloc(i64)




More information about the llvm-commits mailing list