[PATCH] Vectorize starting from insertelements building a vector
Nadav Rotem
nrotem at apple.com
Thu Aug 22 09:15:50 PDT 2013
Hi Matt,
Thanks for implementing this! The build_vector parts look great.
The reason that the tests are under the x86 directory is because we need a cost model. Your test does not have a cost model. If you want a target independent test then I suggest that you use the ’threshold’ command line flag to make sure that your test passes on all platforms. You can either set a high number and cross your fingers, or add a new flag that makes “getTreeCost” return a negative number.
Thanks,
Nadav
On Aug 22, 2013, at 3:09 AM, Matt Arsenault <Matthew.Arsenault at amd.com> wrote:
> The right use check
>
> Hi nadav,
>
> http://llvm-reviews.chandlerc.com/D1471
>
> CHANGE SINCE LAST DIFF
> http://llvm-reviews.chandlerc.com/D1471?vs=3658&id=3659#toc
>
> Files:
> lib/Transforms/Vectorize/SLPVectorizer.cpp
> test/Transforms/SLPVectorizer/insert-element-build-vector.ll
> test/Transforms/SLPVectorizer/lit.local.cfg
>
> Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
> ===================================================================
> --- lib/Transforms/Vectorize/SLPVectorizer.cpp
> +++ lib/Transforms/Vectorize/SLPVectorizer.cpp
> @@ -1869,6 +1869,30 @@
> return 0;
> }
>
> +/// \brief Recognize construction of vectors like
> +/// %ra = insertelement <4 x float> undef, float %s0, i32 0
> +/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
> +/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
> +/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
> +///
> +/// Returns true if it matches
> +///
> +static bool findBuildVector(InsertElementInst *IE,
> + SmallVectorImpl<Value *> &Ops) {
> + if (!isa<UndefValue>(IE->getOperand(0)))
> + return false;
> +
> + while (IE) {
> + if (!IE->hasOneUse())
> + return false;
> +
> + Ops.push_back(IE->getOperand(1));
> + IE = dyn_cast<InsertElementInst>(IE->use_back());
> + }
> +
> + return true;
> +}
> +
> bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
> bool Changed = false;
> SmallVector<Value *, 4> Incoming;
> @@ -1968,6 +1992,21 @@
> }
> continue;
> }
> +
> + // Try to vectorize trees that start at insertelement instructions.
> + if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
> + SmallVector<Value *, 8> Ops;
> + if (!findBuildVector(IE, Ops))
> + continue;
> +
> + if (tryToVectorizeList(Ops, R)) {
> + Changed = true;
> + it = BB->begin();
> + e = BB->end();
> + }
> +
> + continue;
> + }
> }
>
> return Changed;
> Index: test/Transforms/SLPVectorizer/insert-element-build-vector.ll
> ===================================================================
> --- /dev/null
> +++ test/Transforms/SLPVectorizer/insert-element-build-vector.ll
> @@ -0,0 +1,102 @@
> +; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
> +
> +; Function Attrs: nounwind ssp uwtable
> +define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
> +; CHECK-LABEL: @simple_select(
> +; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
> +; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
> + %c0 = extractelement <4 x i32> %c, i32 0
> + %c1 = extractelement <4 x i32> %c, i32 1
> + %c2 = extractelement <4 x i32> %c, i32 2
> + %c3 = extractelement <4 x i32> %c, i32 3
> + %a0 = extractelement <4 x float> %a, i32 0
> + %a1 = extractelement <4 x float> %a, i32 1
> + %a2 = extractelement <4 x float> %a, i32 2
> + %a3 = extractelement <4 x float> %a, i32 3
> + %b0 = extractelement <4 x float> %b, i32 0
> + %b1 = extractelement <4 x float> %b, i32 1
> + %b2 = extractelement <4 x float> %b, i32 2
> + %b3 = extractelement <4 x float> %b, i32 3
> + %cmp0 = icmp ne i32 %c0, 0
> + %cmp1 = icmp ne i32 %c1, 0
> + %cmp2 = icmp ne i32 %c2, 0
> + %cmp3 = icmp ne i32 %c3, 0
> + %s0 = select i1 %cmp0, float %a0, float %b0
> + %s1 = select i1 %cmp1, float %a1, float %b1
> + %s2 = select i1 %cmp2, float %a2, float %b2
> + %s3 = select i1 %cmp3, float %a3, float %b3
> + %ra = insertelement <4 x float> undef, float %s0, i32 0
> + %rb = insertelement <4 x float> %ra, float %s1, i32 1
> + %rc = insertelement <4 x float> %rb, float %s2, i32 2
> + %rd = insertelement <4 x float> %rc, float %s3, i32 3
> + ret <4 x float> %rd
> +}
> +
> +declare void @v4f32_user(<4 x float>) #0
> +declare void @f32_user(float) #0
> +
> +define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
> +; CHECK-LABEL: @simple_select_users(
> +; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
> +; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
> + %c0 = extractelement <4 x i32> %c, i32 0
> + %c1 = extractelement <4 x i32> %c, i32 1
> + %c2 = extractelement <4 x i32> %c, i32 2
> + %c3 = extractelement <4 x i32> %c, i32 3
> + %a0 = extractelement <4 x float> %a, i32 0
> + %a1 = extractelement <4 x float> %a, i32 1
> + %a2 = extractelement <4 x float> %a, i32 2
> + %a3 = extractelement <4 x float> %a, i32 3
> + %b0 = extractelement <4 x float> %b, i32 0
> + %b1 = extractelement <4 x float> %b, i32 1
> + %b2 = extractelement <4 x float> %b, i32 2
> + %b3 = extractelement <4 x float> %b, i32 3
> + %cmp0 = icmp ne i32 %c0, 0
> + %cmp1 = icmp ne i32 %c1, 0
> + %cmp2 = icmp ne i32 %c2, 0
> + %cmp3 = icmp ne i32 %c3, 0
> + %s0 = select i1 %cmp0, float %a0, float %b0
> + %s1 = select i1 %cmp1, float %a1, float %b1
> + %s2 = select i1 %cmp2, float %a2, float %b2
> + %s3 = select i1 %cmp3, float %a3, float %b3
> + %ra = insertelement <4 x float> undef, float %s0, i32 0
> + %rb = insertelement <4 x float> %ra, float %s1, i32 1
> + %rc = insertelement <4 x float> %rb, float %s2, i32 2
> + %rd = insertelement <4 x float> %rc, float %s3, i32 3
> + call void @v4f32_user(<4 x float> %rd) #0
> + ret <4 x float> %rd
> +}
> +
> +define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
> +; CHECK-LABEL: @simple_select_users(
> +; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
> +; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
> + %c0 = extractelement <4 x i32> %c, i32 0
> + %c1 = extractelement <4 x i32> %c, i32 1
> + %c2 = extractelement <4 x i32> %c, i32 2
> + %c3 = extractelement <4 x i32> %c, i32 3
> + %a0 = extractelement <4 x float> %a, i32 0
> + %a1 = extractelement <4 x float> %a, i32 1
> + %a2 = extractelement <4 x float> %a, i32 2
> + %a3 = extractelement <4 x float> %a, i32 3
> + %b0 = extractelement <4 x float> %b, i32 0
> + %b1 = extractelement <4 x float> %b, i32 1
> + %b2 = extractelement <4 x float> %b, i32 2
> + %b3 = extractelement <4 x float> %b, i32 3
> + %cmp0 = icmp ne i32 %c0, 0
> + %cmp1 = icmp ne i32 %c1, 0
> + %cmp2 = icmp ne i32 %c2, 0
> + %cmp3 = icmp ne i32 %c3, 0
> + %s0 = select i1 %cmp0, float %a0, float %b0
> + %s1 = select i1 %cmp1, float %a1, float %b1
> + %s2 = select i1 %cmp2, float %a2, float %b2
> + %s3 = select i1 %cmp3, float %a3, float %b3
> + %ra = insertelement <4 x float> undef, float %s0, i32 0
> + %rb = insertelement <4 x float> %ra, float %s1, i32 1
> + %rc = insertelement <4 x float> %rb, float %s2, i32 2
> + %rd = insertelement <4 x float> %rc, float %s3, i32 3
> + ret <4 x float> zeroinitializer
> +}
> +
> +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
> Index: test/Transforms/SLPVectorizer/lit.local.cfg
> ===================================================================
> --- /dev/null
> +++ test/Transforms/SLPVectorizer/lit.local.cfg
> @@ -0,0 +1 @@
> +config.suffixes = ['.ll']
> <D1471.3.patch>
More information about the llvm-commits
mailing list