[PATCH] Vectorize starting from insertelements building a vector

Nadav Rotem nrotem at apple.com
Thu Aug 22 09:15:50 PDT 2013


Hi Matt, 

Thanks for implementing this!  The build_vector parts look great. 

The reason that the tests are under the x86 directory is because we need a cost model. Your test does not have a cost model. If you want a target independent test then I suggest that you use the ’threshold’ command line flag to make sure that your test passes on all platforms. You can either set a high number and cross your fingers, or add a new flag that makes “getTreeCost” return a negative number. 

Thanks,
Nadav

On Aug 22, 2013, at 3:09 AM, Matt Arsenault <Matthew.Arsenault at amd.com> wrote:

>  The right use check
> 
> Hi nadav,
> 
> http://llvm-reviews.chandlerc.com/D1471
> 
> CHANGE SINCE LAST DIFF
>  http://llvm-reviews.chandlerc.com/D1471?vs=3658&id=3659#toc
> 
> Files:
>  lib/Transforms/Vectorize/SLPVectorizer.cpp
>  test/Transforms/SLPVectorizer/insert-element-build-vector.ll
>  test/Transforms/SLPVectorizer/lit.local.cfg
> 
> Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
> ===================================================================
> --- lib/Transforms/Vectorize/SLPVectorizer.cpp
> +++ lib/Transforms/Vectorize/SLPVectorizer.cpp
> @@ -1869,6 +1869,30 @@
>   return 0;
> }
> 
> +/// \brief Recognize construction of vectors like
> +///  %ra = insertelement <4 x float> undef, float %s0, i32 0
> +///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
> +///  %rc = insertelement <4 x float> %rb, float %s2, i32 2
> +///  %rd = insertelement <4 x float> %rc, float %s3, i32 3
> +///
> +/// Returns true if it matches
> +///
> +static bool findBuildVector(InsertElementInst *IE,
> +                            SmallVectorImpl<Value *> &Ops) {
> +  if (!isa<UndefValue>(IE->getOperand(0)))
> +    return false;
> +
> +  while (IE) {
> +    if (!IE->hasOneUse())
> +      return false;
> +
> +    Ops.push_back(IE->getOperand(1));
> +    IE = dyn_cast<InsertElementInst>(IE->use_back());
> +  }
> +
> +  return true;
> +}
> +
> bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
>   bool Changed = false;
>   SmallVector<Value *, 4> Incoming;
> @@ -1968,6 +1992,21 @@
>       }
>       continue;
>     }
> +
> +    // Try to vectorize trees that start at insertelement instructions.
> +    if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
> +      SmallVector<Value *, 8> Ops;
> +      if (!findBuildVector(IE, Ops))
> +        continue;
> +
> +      if (tryToVectorizeList(Ops, R)) {
> +        Changed = true;
> +        it = BB->begin();
> +        e = BB->end();
> +      }
> +
> +      continue;
> +    }
>   }
> 
>   return Changed;
> Index: test/Transforms/SLPVectorizer/insert-element-build-vector.ll
> ===================================================================
> --- /dev/null
> +++ test/Transforms/SLPVectorizer/insert-element-build-vector.ll
> @@ -0,0 +1,102 @@
> +; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
> +
> +; Function Attrs: nounwind ssp uwtable
> +define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
> +; CHECK-LABEL: @simple_select(
> +; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
> +; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
> +  %c0 = extractelement <4 x i32> %c, i32 0
> +  %c1 = extractelement <4 x i32> %c, i32 1
> +  %c2 = extractelement <4 x i32> %c, i32 2
> +  %c3 = extractelement <4 x i32> %c, i32 3
> +  %a0 = extractelement <4 x float> %a, i32 0
> +  %a1 = extractelement <4 x float> %a, i32 1
> +  %a2 = extractelement <4 x float> %a, i32 2
> +  %a3 = extractelement <4 x float> %a, i32 3
> +  %b0 = extractelement <4 x float> %b, i32 0
> +  %b1 = extractelement <4 x float> %b, i32 1
> +  %b2 = extractelement <4 x float> %b, i32 2
> +  %b3 = extractelement <4 x float> %b, i32 3
> +  %cmp0 = icmp ne i32 %c0, 0
> +  %cmp1 = icmp ne i32 %c1, 0
> +  %cmp2 = icmp ne i32 %c2, 0
> +  %cmp3 = icmp ne i32 %c3, 0
> +  %s0 = select i1 %cmp0, float %a0, float %b0
> +  %s1 = select i1 %cmp1, float %a1, float %b1
> +  %s2 = select i1 %cmp2, float %a2, float %b2
> +  %s3 = select i1 %cmp3, float %a3, float %b3
> +  %ra = insertelement <4 x float> undef, float %s0, i32 0
> +  %rb = insertelement <4 x float> %ra, float %s1, i32 1
> +  %rc = insertelement <4 x float> %rb, float %s2, i32 2
> +  %rd = insertelement <4 x float> %rc, float %s3, i32 3
> +  ret <4 x float> %rd
> +}
> +
> +declare void @v4f32_user(<4 x float>) #0
> +declare void @f32_user(float) #0
> +
> +define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
> +; CHECK-LABEL: @simple_select_users(
> +; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
> +; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
> +  %c0 = extractelement <4 x i32> %c, i32 0
> +  %c1 = extractelement <4 x i32> %c, i32 1
> +  %c2 = extractelement <4 x i32> %c, i32 2
> +  %c3 = extractelement <4 x i32> %c, i32 3
> +  %a0 = extractelement <4 x float> %a, i32 0
> +  %a1 = extractelement <4 x float> %a, i32 1
> +  %a2 = extractelement <4 x float> %a, i32 2
> +  %a3 = extractelement <4 x float> %a, i32 3
> +  %b0 = extractelement <4 x float> %b, i32 0
> +  %b1 = extractelement <4 x float> %b, i32 1
> +  %b2 = extractelement <4 x float> %b, i32 2
> +  %b3 = extractelement <4 x float> %b, i32 3
> +  %cmp0 = icmp ne i32 %c0, 0
> +  %cmp1 = icmp ne i32 %c1, 0
> +  %cmp2 = icmp ne i32 %c2, 0
> +  %cmp3 = icmp ne i32 %c3, 0
> +  %s0 = select i1 %cmp0, float %a0, float %b0
> +  %s1 = select i1 %cmp1, float %a1, float %b1
> +  %s2 = select i1 %cmp2, float %a2, float %b2
> +  %s3 = select i1 %cmp3, float %a3, float %b3
> +  %ra = insertelement <4 x float> undef, float %s0, i32 0
> +  %rb = insertelement <4 x float> %ra, float %s1, i32 1
> +  %rc = insertelement <4 x float> %rb, float %s2, i32 2
> +  %rd = insertelement <4 x float> %rc, float %s3, i32 3
> +  call void @v4f32_user(<4 x float> %rd) #0
> +  ret <4 x float> %rd
> +}
> +
> +define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
> +; CHECK-LABEL: @simple_select_users(
> +; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
> +; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
> +  %c0 = extractelement <4 x i32> %c, i32 0
> +  %c1 = extractelement <4 x i32> %c, i32 1
> +  %c2 = extractelement <4 x i32> %c, i32 2
> +  %c3 = extractelement <4 x i32> %c, i32 3
> +  %a0 = extractelement <4 x float> %a, i32 0
> +  %a1 = extractelement <4 x float> %a, i32 1
> +  %a2 = extractelement <4 x float> %a, i32 2
> +  %a3 = extractelement <4 x float> %a, i32 3
> +  %b0 = extractelement <4 x float> %b, i32 0
> +  %b1 = extractelement <4 x float> %b, i32 1
> +  %b2 = extractelement <4 x float> %b, i32 2
> +  %b3 = extractelement <4 x float> %b, i32 3
> +  %cmp0 = icmp ne i32 %c0, 0
> +  %cmp1 = icmp ne i32 %c1, 0
> +  %cmp2 = icmp ne i32 %c2, 0
> +  %cmp3 = icmp ne i32 %c3, 0
> +  %s0 = select i1 %cmp0, float %a0, float %b0
> +  %s1 = select i1 %cmp1, float %a1, float %b1
> +  %s2 = select i1 %cmp2, float %a2, float %b2
> +  %s3 = select i1 %cmp3, float %a3, float %b3
> +  %ra = insertelement <4 x float> undef, float %s0, i32 0
> +  %rb = insertelement <4 x float> %ra, float %s1, i32 1
> +  %rc = insertelement <4 x float> %rb, float %s2, i32 2
> +  %rd = insertelement <4 x float> %rc, float %s3, i32 3
> +  ret <4 x float> zeroinitializer
> +}
> +
> +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
> Index: test/Transforms/SLPVectorizer/lit.local.cfg
> ===================================================================
> --- /dev/null
> +++ test/Transforms/SLPVectorizer/lit.local.cfg
> @@ -0,0 +1 @@
> +config.suffixes = ['.ll']
> <D1471.3.patch>





More information about the llvm-commits mailing list