[llvm] r337471 - [LoadStoreVectorizer] Use getMinusScev() to compute the distance between two pointers.

Thu Jul 19 11:39:09 PDT 2018

FYI, I replied to the review thread with a few ideas for improvement on 
this patch.

On 07/19/2018 09:50 AM, Farhana Aleen via llvm-commits wrote:
> Author: faaleen
> Date: Thu Jul 19 09:50:27 2018
> New Revision: 337471
>
> URL: http://llvm.org/viewvc/llvm-project?rev=337471&view=rev
> Log:
> [LoadStoreVectorizer] Use getMinusScev() to compute the distance between two pointers.
>
> Summary: Currently, isConsecutiveAccess() detects two pointers(PtrA and PtrB) as consecutive by
>           comparing PtrB with BaseDelta+PtrA. This works when both pointers are factorized or
>           both of them are not factorized. But isConsecutiveAccess() fails if one of the
>           pointers is factorized but the other one is not.
>
>           Here is an example:
>           PtrA = 4 * (A + B)
>           PtrB = 4 + 4A + 4B
>
>           This patch uses getMinusSCEV() to compute the distance between two pointers.
>           getMinusSCEV() allows combining the expressions and computing the simplified distance.
>
> Author: FarhanaAleen
>
> Reviewed By: rampitec
>
> Differential Revision: https://reviews.llvm.org/D49516
>
> Added:
>      llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll
> Modified:
>      llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=337471&r1=337470&r2=337471&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Thu Jul 19 09:50:27 2018
> @@ -340,6 +340,14 @@ bool Vectorizer::isConsecutiveAccess(Val
>     if (X == PtrSCEVB)
>       return true;
>   
> +  // The above check will not catch the cases where one of the pointers is
> +  // factorized but the other one is not, such as (C + (S * (A + B))) vs
> +  // (AS + BS). Get the minus scev. That will allow re-combining the expresions
> +  // and getting the simplified difference.
> +  const SCEV *Dist = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA);
> +  if (C == Dist)
> +    return true;
> +
>     // Sometimes even this doesn't work, because SCEV can't always see through
>     // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
>     // things the hard way.
>
> Added: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll?rev=337471&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll (added)
> +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll Thu Jul 19 09:50:27 2018
> @@ -0,0 +1,49 @@
> +; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
> +
> +declare i64 @_Z12get_local_idj(i32)
> +
> +declare i64 @_Z12get_group_idj(i32)
> +
> +declare double @llvm.fmuladd.f64(double, double, double)
> +
> +; CHECK-LABEL: @factorizedVsNonfactorizedAccess(
> +; CHECK: load <2 x float>
> +; CHECK: store <2 x float>
> +define amdgpu_kernel void @factorizedVsNonfactorizedAccess(float addrspace(1)* nocapture %c) {
> +entry:
> +  %call = tail call i64 @_Z12get_local_idj(i32 0)
> +  %call1 = tail call i64 @_Z12get_group_idj(i32 0)
> +  %div = lshr i64 %call, 4
> +  %div2 = lshr i64 %call1, 3
> +  %mul = shl i64 %div2, 7
> +  %rem = shl i64 %call, 3
> +  %mul3 = and i64 %rem, 120
> +  %add = or i64 %mul, %mul3
> +  %rem4 = shl i64 %call1, 7
> +  %mul5 = and i64 %rem4, 896
> +  %mul6 = shl nuw nsw i64 %div, 3
> +  %add7 = add nuw i64 %mul5, %mul6
> +  %mul9 = shl i64 %add7, 10
> +  %add10 = add i64 %mul9, %add
> +  %arrayidx = getelementptr inbounds float, float addrspace(1)* %c, i64 %add10
> +  %load1 = load float, float addrspace(1)* %arrayidx, align 4
> +  %conv = fpext float %load1 to double
> +  %mul11 = fmul double %conv, 0x3FEAB481D8F35506
> +  %conv12 = fptrunc double %mul11 to float
> +  %conv18 = fpext float %conv12 to double
> +  %storeval1 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv18)
> +  %cstoreval1 = fptrunc double %storeval1 to float
> +  store float %cstoreval1, float addrspace(1)* %arrayidx, align 4
> +
> +  %add23 = or i64 %add10, 1
> +  %arrayidx24 = getelementptr inbounds float, float addrspace(1)* %c, i64 %add23
> +  %load2 = load float, float addrspace(1)* %arrayidx24, align 4
> +  %conv25 = fpext float %load2 to double
> +  %mul26 = fmul double %conv25, 0x3FEAB481D8F35506
> +  %conv27 = fptrunc double %mul26 to float
> +  %conv34 = fpext float %conv27 to double
> +  %storeval2 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv34)
> +  %cstoreval2 = fptrunc double %storeval2 to float
> +  store float %cstoreval2, float addrspace(1)* %arrayidx24, align 4
> +  ret void
> +}
> \ No newline at end of file
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits