[llvm] r273864 - Fixed consecutive memory access detection in Loop Vectorizer.
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 27 04:19:26 PDT 2016
Author: delena
Date: Mon Jun 27 06:19:23 2016
New Revision: 273864
URL: http://llvm.org/viewvc/llvm-project?rev=273864&view=rev
Log:
Fixed consecutive memory access detection in Loop Vectorizer.
It did not handle correctly cases without GEP.
The following loop wasn't vectorized:
for (int i=0; i<len; i++)
*to++ = *from++;
I use getPtrStride() to find Stride for memory access and return 0 is the Stride is not 1 or -1.
Re-commit rL273257 - revision: http://reviews.llvm.org/D20789
Added:
llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll
- copied unchanged from r273257, llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll
llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll
Modified:
llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=273864&r1=273863&r2=273864&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Mon Jun 27 06:19:23 2016
@@ -679,9 +679,11 @@ const SCEV *replaceSymbolicStrideSCEV(Pr
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
+/// The \p ShouldCheckWrap indicates that we should ensure that address
+/// calculation does not wrap.
int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
- bool Assume = false);
+ bool Assume = false, bool ShouldCheckWrap = true);
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
/// This is a simple API that does not depend on the analysis pass.
Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=273864&r1=273863&r2=273864&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Mon Jun 27 06:19:23 2016
@@ -866,7 +866,7 @@ static bool isNoWrapAddRec(Value *Ptr, c
/// \brief Check whether the access through \p Ptr has a constant stride.
int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
- bool Assume) {
+ bool Assume, bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarE
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
- bool IsNoWrapAddRec =
- PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
- isNoWrapAddRec(Ptr, AR, PSE, Lp);
+ bool IsNoWrapAddRec = !ShouldCheckWrap ||
+ PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
+ isNoWrapAddRec(Ptr, AR, PSE, Lp);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
if (Assume) {
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=273864&r1=273863&r2=273864&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Jun 27 06:19:23 2016
@@ -2242,87 +2242,13 @@ Value *InnerLoopVectorizer::getStepVecto
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
- assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
- auto *SE = PSE.getSE();
- // Make sure that the pointer does not point to structs.
- if (Ptr->getType()->getPointerElementType()->isAggregateType())
- return 0;
-
- // If this value is a pointer induction variable, we know it is consecutive.
- PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
- if (Phi && Inductions.count(Phi)) {
- InductionDescriptor II = Inductions[Phi];
- return II.getConsecutiveDirection();
- }
-
- GetElementPtrInst *Gep = getGEPInstruction(Ptr);
- if (!Gep)
- return 0;
-
- unsigned NumOperands = Gep->getNumOperands();
- Value *GpPtr = Gep->getPointerOperand();
- // If this GEP value is a consecutive pointer induction variable and all of
- // the indices are constant, then we know it is consecutive.
- Phi = dyn_cast<PHINode>(GpPtr);
- if (Phi && Inductions.count(Phi)) {
-
- // Make sure that the pointer does not point to structs.
- PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
- if (GepPtrType->getElementType()->isAggregateType())
- return 0;
-
- // Make sure that all of the index operands are loop invariant.
- for (unsigned i = 1; i < NumOperands; ++i)
- if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
- return 0;
-
- InductionDescriptor II = Inductions[Phi];
- return II.getConsecutiveDirection();
- }
- unsigned InductionOperand = getGEPInductionOperand(Gep);
-
- // Check that all of the gep indices are uniform except for our induction
- // operand.
- for (unsigned i = 0; i != NumOperands; ++i)
- if (i != InductionOperand &&
- !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
- return 0;
-
- // We can emit wide load/stores only if the last non-zero index is the
- // induction variable.
- const SCEV *Last = nullptr;
- if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep))
- Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
- else {
- // Because of the multiplication by a stride we can have a s/zext cast.
- // We are going to replace this stride by 1 so the cast is safe to ignore.
- //
- // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- // %0 = trunc i64 %indvars.iv to i32
- // %mul = mul i32 %0, %Stride1
- // %idxprom = zext i32 %mul to i64 << Safe cast.
- // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
- //
- Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(),
- Gep->getOperand(InductionOperand), Gep);
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
- Last =
- (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend)
- ? C->getOperand()
- : Last;
- }
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
- const SCEV *Step = AR->getStepRecurrence(*SE);
-
- // The memory is consecutive because the last index is consecutive
- // and all other indices are loop invariant.
- if (Step->isOne())
- return 1;
- if (Step->isAllOnesValue())
- return -1;
- }
+ const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
+ ValueToValueMap();
+ int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
+ if (Stride == 1 || Stride == -1)
+ return Stride;
return 0;
}
@@ -2658,7 +2584,9 @@ void InnerLoopVectorizer::vectorizeMemor
// Handle consecutive loads/stores.
GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (ConsecutiveStride) {
- if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
+ if (Gep &&
+ !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+ OrigLoop)) {
setDebugLocFromInst(Builder, Gep);
Value *PtrOperand = Gep->getPointerOperand();
Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
@@ -2671,9 +2599,6 @@ void InnerLoopVectorizer::vectorizeMemor
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
setDebugLocFromInst(Builder, Gep);
- assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
- OrigLoop) &&
- "Base ptr must be invariant");
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
@@ -2702,8 +2627,6 @@ void InnerLoopVectorizer::vectorizeMemor
}
Ptr = Builder.Insert(Gep2);
} else { // No GEP
- // Use the induction element ptr.
- assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
setDebugLocFromInst(Builder, Ptr);
VectorParts &PtrVal = getVectorValue(Ptr);
Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
Added: llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll?rev=273864&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll Mon Jun 27 06:19:23 2016
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; CHECK-LABEL: @img2buf
+; CHECK: store <4 x i32>
+; Function Attrs: nounwind
+define void @img2buf(i64 %val, i8* nocapture %buf, i32 %N) local_unnamed_addr #0 {
+entry:
+ br label %l2
+
+l2:
+ br label %for.body57.us
+
+for.body57.us:
+ %indvars.iv24 = phi i64 [ %val, %l2 ], [ %indvars.iv.next25, %for.body57.us ]
+ %0 = trunc i64 %indvars.iv24 to i32
+ %add77.us = add i32 5, %0
+ %mul78.us = shl nsw i32 %add77.us, 2
+ %idx.ext79.us = sext i32 %mul78.us to i64
+ %add.ptr80.us = getelementptr inbounds i8, i8* %buf, i64 %idx.ext79.us
+ %ui32.0.add.ptr80.sroa_cast.us = bitcast i8* %add.ptr80.us to i32*
+ store i32 0, i32* %ui32.0.add.ptr80.sroa_cast.us, align 1
+ %indvars.iv.next25 = add nsw i64 %indvars.iv24, 1
+ %lftr.wideiv26 = trunc i64 %indvars.iv.next25 to i32
+ %exitcond27 = icmp eq i32 %lftr.wideiv26, %N
+ br i1 %exitcond27, label %l3, label %for.body57.us
+
+l3:
+ ret void
+}
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
Modified: llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll?rev=273864&r1=273863&r2=273864&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll Mon Jun 27 06:19:23 2016
@@ -18,6 +18,7 @@ while.body.preheader:
while.body: ; preds = %while.body.preheader, %while.body
%a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
%acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+ %a1.pn = getelementptr inbounds i32, i32* %a.pn, i64 0
%incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
%0 = load i32, i32* %incdec.ptr8, align 1
%add = add nuw nsw i32 %0, %acc.07
More information about the llvm-commits
mailing list