[llvm] r186088 - LoopVectorize: Vectorize all accesses in address space zero with unit stride

Thu Jul 11 09:08:04 PDT 2013

On Jul 11, 2013 9:02 AM, "Arnold Schwaighofer" <aschwaighofer at apple.com>
wrote:
>
>
> On Jul 11, 2013, at 10:28 AM, David Blaikie <dblaikie at gmail.com> wrote:
>
> > On Thu, Jul 11, 2013 at 8:21 AM, Arnold Schwaighofer
> > <aschwaighofer at apple.com> wrote:
> >> Author: arnolds
> >> Date: Thu Jul 11 10:21:55 2013
> >> New Revision: 186088
> >>
> >> URL: http://llvm.org/viewvc/llvm-project?rev=186088&view=rev
> >> Log:
> >> LoopVectorize: Vectorize all accesses in address space zero with unit
stride
> >
> > Naively (since I know very little about LLVM optimization details,
> > mostly working up in Clang): do you need to limit this to unit stride?
> > Any object that would include address zero would be invalid, no? (I'm
> > not sure whether vectorization can have holes (eg: elements of size 1
> > but stride 2),
>
> In principle the vectorizer can vectorize this with a gather/scather. In
many cases the cost model will tell it not to.
>
> In this context the “stride” already has the element size factored in.
>
> > if so you might need to avoid those - but if it has
> > stride 2 and size 2
>
> This is a unit stride. The unit stride I am referring to is taking the
element size into account.
>
> > and crosses zero even if zero isn't one of the
> > element addresses (instead it's the address of the second byte of an
> > element) should be eligible for the same optimization as size 1 stride
> > 1, no?
>
> Yes, you are right. The code already does that.
>
> for i in ..: a[i] is a unit stride access.
> while
> for i in ..” a[2*i] is a non unit stride access (“stride 2”)
>
> irrespective of the type of a.

Ah, OK. Thanks for the explanation & sorry for the noise.

>
>
>
> >
> >>
> >> We can vectorize them because in the case where we wrap in the address
space the
> >> unvectorized code would have had to access a pointer value of zero
which is
> >> undefined behavior in address space zero according to the LLVM IR
semantics.
> >> (Thank you Duncan, for pointing this out to me).
> >>
> >> Fixes PR16592.
> >>
> >> Added:
> >>    llvm/trunk/test/Transforms/LoopVectorize/safegep.ll
> >> Modified:
> >>    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> >>
> >> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> >> URL:
http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=186088&r1=186087&r2=186088&view=diff
> >>
==============================================================================
> >> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> >> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Jul 11
10:21:55 2013
> >> @@ -3223,11 +3223,12 @@ static bool isInBoundsGep(Value *Ptr) {
> >> /// \brief Check whether the access through \p Ptr has a constant
stride.
> >> static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value
*Ptr,
> >>                         const Loop *Lp) {
> >> -  const Type *PtrTy = Ptr->getType();
> >> -  assert(PtrTy->isPointerTy() && "Unexpected non ptr");
> >> +  const Type *Ty = Ptr->getType();
> >> +  assert(Ty->isPointerTy() && "Unexpected non ptr");
> >>
> >>   // Make sure that the pointer does not point to aggregate types.
> >> -  if
(cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType()) {
> >> +  const PointerType *PtrTy = cast<PointerType>(Ty);
> >> +  if (PtrTy->getElementType()->isAggregateType()) {
> >>     DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type"
<< *Ptr
> >>           << "\n");
> >>     return 0;
> >> @@ -3248,11 +3249,16 @@ static int isStridedPtr(ScalarEvolution
> >>   }
> >>
> >>   // The address calculation must not wrap. Otherwise, a dependence
could be
> >> -  // inverted. An inbounds getelementptr that is a AddRec with a unit
stride
> >> +  // inverted.
> >> +  // An inbounds getelementptr that is a AddRec with a unit stride
> >>   // cannot wrap per definition. The unit stride requirement is
checked later.
> >> +  // An getelementptr without an inbounds attribute and unit stride
would have
> >> +  // to access the pointer value "0" which is undefined behavior in
address
> >> +  // space 0, therefore we can also vectorize this case.
> >>   bool IsInBoundsGEP = isInBoundsGep(Ptr);
> >>   bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
> >> -  if (!IsNoWrapAddRec && !IsInBoundsGEP) {
> >> +  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
> >> +  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
> >>     DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address
space "
> >>           << *Ptr << " SCEV: " << *PtrScev << "\n");
> >>     return 0;
> >> @@ -3269,7 +3275,7 @@ static int isStridedPtr(ScalarEvolution
> >>     return 0;
> >>   }
> >>
> >> -  int64_t Size = DL->getTypeAllocSize(PtrTy->getPointerElementType());
> >> +  int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
> >>   const APInt &APStepVal = C->getValue()->getValue();
> >>
> >>   // Huge step value - give up.
> >> @@ -3285,8 +3291,10 @@ static int isStridedPtr(ScalarEvolution
> >>     return 0;
> >>
> >>   // If the SCEV could wrap but we have an inbounds gep with a unit
stride we
> >> -  // know we can't "wrap around the address space".
> >> -  if (!IsNoWrapAddRec && IsInBoundsGEP && Stride != 1 && Stride != -1)
> >> +  // know we can't "wrap around the address space". In case of
address space
> >> +  // zero we know that this won't happen without triggering undefined
behavior.
> >> +  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
> >> +      Stride != 1 && Stride != -1)
> >>     return 0;
> >>
> >>   return Stride;
> >>
> >> Added: llvm/trunk/test/Transforms/LoopVectorize/safegep.ll
> >> URL:
http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/safegep.ll?rev=186088&view=auto
> >>
==============================================================================
> >> --- llvm/trunk/test/Transforms/LoopVectorize/safegep.ll (added)
> >> +++ llvm/trunk/test/Transforms/LoopVectorize/safegep.ll Thu Jul 11
10:21:55 2013
> >> @@ -0,0 +1,61 @@
> >> +; RUN: opt -S -loop-vectorize -force-vector-width=4
-force-vector-unroll=1  < %s |  FileCheck %s
> >> +target datalayout =
"e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
> >> +
> >> +
> >> +; We can vectorize this code because if the address computation would
wrap then
> >> +; a load from 0 would take place which is undefined behaviour in
address space 0
> >> +; according to LLVM IR semantics.
> >> +
> >> +; PR16592
> >> +
> >> +; CHECK: safe
> >> +; CHECK: <4 x float>
> >> +
> >> +define void @safe(float* %A, float* %B, float %K) {
> >> +entry:
> >> +  br label %"<bb 3>"
> >> +
> >> +"<bb 3>":
> >> +  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
> >> +  %pp3 = getelementptr float* %A, i32 %i_15
> >> +  %D.1396_10 = load float* %pp3, align 4
> >> +  %pp24 = getelementptr float* %B, i32 %i_15
> >> +  %D.1398_15 = load float* %pp24, align 4
> >> +  %D.1399_17 = fadd float %D.1398_15, %K
> >> +  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
> >> +  store float %D.1400_18, float* %pp3, align 4
> >> +  %i_19 = add nsw i32 %i_15, 1
> >> +  %exitcond = icmp ne i32 %i_19, 64
> >> +  br i1 %exitcond, label %"<bb 3>", label %return
> >> +
> >> +return:
> >> +  ret void
> >> +}
> >> +
> >> +; In a non-default address space we don't have this rule.
> >> +
> >> +; CHECK: notsafe
> >> +; CHECK-NOT: <4 x float>
> >> +
> >> +define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
> >> +entry:
> >> +  br label %"<bb 3>"
> >> +
> >> +"<bb 3>":
> >> +  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
> >> +  %pp3 = getelementptr float addrspace(5) * %A, i32 %i_15
> >> +  %D.1396_10 = load float addrspace(5) * %pp3, align 4
> >> +  %pp24 = getelementptr float* %B, i32 %i_15
> >> +  %D.1398_15 = load float* %pp24, align 4
> >> +  %D.1399_17 = fadd float %D.1398_15, %K
> >> +  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
> >> +  store float %D.1400_18, float addrspace(5) * %pp3, align 4
> >> +  %i_19 = add nsw i32 %i_15, 1
> >> +  %exitcond = icmp ne i32 %i_19, 64
> >> +  br i1 %exitcond, label %"<bb 3>", label %return
> >> +
> >> +return:
> >> +  ret void
> >> +}
> >> +
> >> +
> >>
> >>
> >> _______________________________________________
> >> llvm-commits mailing list
> >> llvm-commits at cs.uiuc.edu
> >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130711/58a99a29/attachment.html>