[llvm] r186088 - LoopVectorize: Vectorize all accesses in address space zero with unit stride

Thu Jul 11 08:28:45 PDT 2013

On Thu, Jul 11, 2013 at 8:21 AM, Arnold Schwaighofer
<aschwaighofer at apple.com> wrote:
> Author: arnolds
> Date: Thu Jul 11 10:21:55 2013
> New Revision: 186088
>
> URL: http://llvm.org/viewvc/llvm-project?rev=186088&view=rev
> Log:
> LoopVectorize: Vectorize all accesses in address space zero with unit stride

Naively (since I know very little about LLVM optimization details,
mostly working up in Clang): do you need to limit this to unit stride?
Any object that would include address zero would be invalid, no? (I'm
not sure whether vectorization can have holes (eg: elements of size 1
but stride 2), if so you might need to avoid those - but if it has
stride 2 and size 2 and crosses zero even if zero isn't one of the
element addresses (instead it's the address of the second byte of an
element) should be eligible for the same optimization as size 1 stride
1, no?

>
> We can vectorize them because in the case where we wrap in the address space the
> unvectorized code would have had to access a pointer value of zero which is
> undefined behavior in address space zero according to the LLVM IR semantics.
> (Thank you Duncan, for pointing this out to me).
>
> Fixes PR16592.
>
> Added:
>     llvm/trunk/test/Transforms/LoopVectorize/safegep.ll
> Modified:
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=186088&r1=186087&r2=186088&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Jul 11 10:21:55 2013
> @@ -3223,11 +3223,12 @@ static bool isInBoundsGep(Value *Ptr) {
>  /// \brief Check whether the access through \p Ptr has a constant stride.
>  static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
>                          const Loop *Lp) {
> -  const Type *PtrTy = Ptr->getType();
> -  assert(PtrTy->isPointerTy() && "Unexpected non ptr");
> +  const Type *Ty = Ptr->getType();
> +  assert(Ty->isPointerTy() && "Unexpected non ptr");
>
>    // Make sure that the pointer does not point to aggregate types.
> -  if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType()) {
> +  const PointerType *PtrTy = cast<PointerType>(Ty);
> +  if (PtrTy->getElementType()->isAggregateType()) {
>      DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr
>            << "\n");
>      return 0;
> @@ -3248,11 +3249,16 @@ static int isStridedPtr(ScalarEvolution
>    }
>
>    // The address calculation must not wrap. Otherwise, a dependence could be
> -  // inverted. An inbounds getelementptr that is a AddRec with a unit stride
> +  // inverted.
> +  // An inbounds getelementptr that is a AddRec with a unit stride
>    // cannot wrap per definition. The unit stride requirement is checked later.
> +  // An getelementptr without an inbounds attribute and unit stride would have
> +  // to access the pointer value "0" which is undefined behavior in address
> +  // space 0, therefore we can also vectorize this case.
>    bool IsInBoundsGEP = isInBoundsGep(Ptr);
>    bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
> -  if (!IsNoWrapAddRec && !IsInBoundsGEP) {
> +  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
> +  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
>      DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
>            << *Ptr << " SCEV: " << *PtrScev << "\n");
>      return 0;
> @@ -3269,7 +3275,7 @@ static int isStridedPtr(ScalarEvolution
>      return 0;
>    }
>
> -  int64_t Size = DL->getTypeAllocSize(PtrTy->getPointerElementType());
> +  int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
>    const APInt &APStepVal = C->getValue()->getValue();
>
>    // Huge step value - give up.
> @@ -3285,8 +3291,10 @@ static int isStridedPtr(ScalarEvolution
>      return 0;
>
>    // If the SCEV could wrap but we have an inbounds gep with a unit stride we
> -  // know we can't "wrap around the address space".
> -  if (!IsNoWrapAddRec && IsInBoundsGEP && Stride != 1 && Stride != -1)
> +  // know we can't "wrap around the address space". In case of address space
> +  // zero we know that this won't happen without triggering undefined behavior.
> +  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
> +      Stride != 1 && Stride != -1)
>      return 0;
>
>    return Stride;
>
> Added: llvm/trunk/test/Transforms/LoopVectorize/safegep.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/safegep.ll?rev=186088&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/safegep.ll (added)
> +++ llvm/trunk/test/Transforms/LoopVectorize/safegep.ll Thu Jul 11 10:21:55 2013
> @@ -0,0 +1,61 @@
> +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1  < %s |  FileCheck %s
> +target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
> +
> +
> +; We can vectorize this code because if the address computation would wrap then
> +; a load from 0 would take place which is undefined behaviour in address space 0
> +; according to LLVM IR semantics.
> +
> +; PR16592
> +
> +; CHECK: safe
> +; CHECK: <4 x float>
> +
> +define void @safe(float* %A, float* %B, float %K) {
> +entry:
> +  br label %"<bb 3>"
> +
> +"<bb 3>":
> +  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
> +  %pp3 = getelementptr float* %A, i32 %i_15
> +  %D.1396_10 = load float* %pp3, align 4
> +  %pp24 = getelementptr float* %B, i32 %i_15
> +  %D.1398_15 = load float* %pp24, align 4
> +  %D.1399_17 = fadd float %D.1398_15, %K
> +  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
> +  store float %D.1400_18, float* %pp3, align 4
> +  %i_19 = add nsw i32 %i_15, 1
> +  %exitcond = icmp ne i32 %i_19, 64
> +  br i1 %exitcond, label %"<bb 3>", label %return
> +
> +return:
> +  ret void
> +}
> +
> +; In a non-default address space we don't have this rule.
> +
> +; CHECK: notsafe
> +; CHECK-NOT: <4 x float>
> +
> +define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
> +entry:
> +  br label %"<bb 3>"
> +
> +"<bb 3>":
> +  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
> +  %pp3 = getelementptr float addrspace(5) * %A, i32 %i_15
> +  %D.1396_10 = load float addrspace(5) * %pp3, align 4
> +  %pp24 = getelementptr float* %B, i32 %i_15
> +  %D.1398_15 = load float* %pp24, align 4
> +  %D.1399_17 = fadd float %D.1398_15, %K
> +  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
> +  store float %D.1400_18, float addrspace(5) * %pp3, align 4
> +  %i_19 = add nsw i32 %i_15, 1
> +  %exitcond = icmp ne i32 %i_19, 64
> +  br i1 %exitcond, label %"<bb 3>", label %return
> +
> +return:
> +  ret void
> +}
> +
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits