[llvm] r253091 - [LIR] Add support for creating memcpys from loops with a negative stride.
Chad Rosier via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 13 13:51:02 PST 2015
Author: mcrosier
Date: Fri Nov 13 15:51:02 2015
New Revision: 253091
URL: http://llvm.org/viewvc/llvm-project?rev=253091&view=rev
Log:
[LIR] Add support for creating memcpys from loops with a negative stride.
This allows us to transform the below loop into a memcpy.
void test(unsigned *__restrict__ a, unsigned *__restrict__ b) {
for (int i = 2047; i >= 0; --i) {
a[i] = b[i];
}
}
This is the memcpy version of r251518, which added support for memset with
negative strided loops.
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/trunk/test/Transforms/LoopIdiom/basic.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=253091&r1=253090&r2=253091&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Fri Nov 13 15:51:02 2015
@@ -129,7 +129,7 @@ private:
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount);
+ const SCEV *BECount, bool NegStride);
/// @}
/// \name Noncountable Loop Idiom Handling
@@ -362,10 +362,6 @@ bool LoopIdiomRecognize::processLoopStor
StoredVal, SI, StoreEv, BECount, NegStride))
return true;
- // TODO: We don't handle negative stride memcpys.
- if (NegStride)
- return false;
-
// If the stored value is a strided load in the same loop with the same stride
// this may be transformable into a memcpy. This kicks in for stuff like
// for (i) A[i] = B[i];
@@ -374,7 +370,8 @@ bool LoopIdiomRecognize::processLoopStor
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
- if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount,
+ NegStride))
return true;
}
// errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
@@ -626,7 +623,7 @@ bool LoopIdiomRecognize::processLoopStri
/// same-strided load.
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
+ const SCEVAddRecExpr *LoadEv, const SCEV *BECount, bool NegStride) {
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
@@ -640,6 +637,14 @@ bool LoopIdiomRecognize::processLoopStor
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
+ const SCEV *StrStart = StoreEv->getStart();
+ unsigned StrAS = SI->getPointerAddressSpace();
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+
+ // Handle negative strided loops.
+ if (NegStride)
+ StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -647,8 +652,7 @@ bool LoopIdiomRecognize::processLoopStor
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
- StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
StoreSize, *AA, SI)) {
@@ -658,11 +662,17 @@ bool LoopIdiomRecognize::processLoopStor
return false;
}
+ const SCEV *LdStart = LoadEv->getStart();
+ unsigned LdAS = LI->getPointerAddressSpace();
+
+ // Handle negative strided loops.
+ if (NegStride)
+ LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
Value *LoadBasePtr = Expander.expandCodeFor(
- LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
*AA, SI)) {
@@ -677,7 +687,6 @@ bool LoopIdiomRecognize::processLoopStor
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
const SCEV *NumBytesS =
Modified: llvm/trunk/test/Transforms/LoopIdiom/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/basic.ll?rev=253091&r1=253090&r2=253091&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/basic.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/basic.ll Fri Nov 13 15:51:02 2015
@@ -469,7 +469,7 @@ for.cond.cleanup:
; CHECK: ret void
}
-; We don't handle memcpy-able loops with negative stride.
+; Handle memcpy-able loops with negative stride.
define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
entry:
%conv = sext i32 %c to i64
@@ -499,8 +499,35 @@ while.end.loopexit:
while.end: ; preds = %while.end.loopexit, %entry
ret i32* %0
; CHECK-LABEL: @test17(
-; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call void @llvm.memcpy
; CHECK: ret i32*
}
declare noalias i8* @malloc(i64)
+
+; Handle memcpy-able loops with negative stride.
+; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
+; for (int i = 2047; i >= 0; --i) {
+; a[i] = b[i];
+; }
+; }
+define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+; CHECK-LABEL: @test18(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret
+}
More information about the llvm-commits
mailing list