[PATCH] Vectorize starting from insertelements building a vector
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Aug 22 03:09:48 PDT 2013
The right use check
Hi nadav,
http://llvm-reviews.chandlerc.com/D1471
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D1471?vs=3658&id=3659#toc
Files:
lib/Transforms/Vectorize/SLPVectorizer.cpp
test/Transforms/SLPVectorizer/insert-element-build-vector.ll
test/Transforms/SLPVectorizer/lit.local.cfg
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1869,6 +1869,30 @@
return 0;
}
+/// \brief Recognize construction of vectors like
+/// %ra = insertelement <4 x float> undef, float %s0, i32 0
+/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
+/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
+/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
+///
+/// Returns true if it matches
+///
+static bool findBuildVector(InsertElementInst *IE,
+ SmallVectorImpl<Value *> &Ops) {
+ if (!isa<UndefValue>(IE->getOperand(0)))
+ return false;
+
+ while (IE) {
+ if (!IE->hasOneUse())
+ return false;
+
+ Ops.push_back(IE->getOperand(1));
+ IE = dyn_cast<InsertElementInst>(IE->use_back());
+ }
+
+ return true;
+}
+
bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
@@ -1968,6 +1992,21 @@
}
continue;
}
+
+ // Try to vectorize trees that start at insertelement instructions.
+ if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
+ SmallVector<Value *, 8> Ops;
+ if (!findBuildVector(IE, Ops))
+ continue;
+
+ if (tryToVectorizeList(Ops, R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ }
+
+ continue;
+ }
}
return Changed;
Index: test/Transforms/SLPVectorizer/insert-element-build-vector.ll
===================================================================
--- /dev/null
+++ test/Transforms/SLPVectorizer/insert-element-build-vector.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
+
+; Function Attrs: nounwind ssp uwtable
+define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %a0 = extractelement <4 x float> %a, i32 0
+ %a1 = extractelement <4 x float> %a, i32 1
+ %a2 = extractelement <4 x float> %a, i32 2
+ %a3 = extractelement <4 x float> %a, i32 3
+ %b0 = extractelement <4 x float> %b, i32 0
+ %b1 = extractelement <4 x float> %b, i32 1
+ %b2 = extractelement <4 x float> %b, i32 2
+ %b3 = extractelement <4 x float> %b, i32 3
+ %cmp0 = icmp ne i32 %c0, 0
+ %cmp1 = icmp ne i32 %c1, 0
+ %cmp2 = icmp ne i32 %c2, 0
+ %cmp3 = icmp ne i32 %c3, 0
+ %s0 = select i1 %cmp0, float %a0, float %b0
+ %s1 = select i1 %cmp1, float %a1, float %b1
+ %s2 = select i1 %cmp2, float %a2, float %b2
+ %s3 = select i1 %cmp3, float %a3, float %b3
+ %ra = insertelement <4 x float> undef, float %s0, i32 0
+ %rb = insertelement <4 x float> %ra, float %s1, i32 1
+ %rc = insertelement <4 x float> %rb, float %s2, i32 2
+ %rd = insertelement <4 x float> %rc, float %s3, i32 3
+ ret <4 x float> %rd
+}
+
+declare void @v4f32_user(<4 x float>) #0
+declare void @f32_user(float) #0
+
+define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_users(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %a0 = extractelement <4 x float> %a, i32 0
+ %a1 = extractelement <4 x float> %a, i32 1
+ %a2 = extractelement <4 x float> %a, i32 2
+ %a3 = extractelement <4 x float> %a, i32 3
+ %b0 = extractelement <4 x float> %b, i32 0
+ %b1 = extractelement <4 x float> %b, i32 1
+ %b2 = extractelement <4 x float> %b, i32 2
+ %b3 = extractelement <4 x float> %b, i32 3
+ %cmp0 = icmp ne i32 %c0, 0
+ %cmp1 = icmp ne i32 %c1, 0
+ %cmp2 = icmp ne i32 %c2, 0
+ %cmp3 = icmp ne i32 %c3, 0
+ %s0 = select i1 %cmp0, float %a0, float %b0
+ %s1 = select i1 %cmp1, float %a1, float %b1
+ %s2 = select i1 %cmp2, float %a2, float %b2
+ %s3 = select i1 %cmp3, float %a3, float %b3
+ %ra = insertelement <4 x float> undef, float %s0, i32 0
+ %rb = insertelement <4 x float> %ra, float %s1, i32 1
+ %rc = insertelement <4 x float> %rb, float %s2, i32 2
+ %rd = insertelement <4 x float> %rc, float %s3, i32 3
+ call void @v4f32_user(<4 x float> %rd) #0
+ ret <4 x float> %rd
+}
+
+define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_users(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %a0 = extractelement <4 x float> %a, i32 0
+ %a1 = extractelement <4 x float> %a, i32 1
+ %a2 = extractelement <4 x float> %a, i32 2
+ %a3 = extractelement <4 x float> %a, i32 3
+ %b0 = extractelement <4 x float> %b, i32 0
+ %b1 = extractelement <4 x float> %b, i32 1
+ %b2 = extractelement <4 x float> %b, i32 2
+ %b3 = extractelement <4 x float> %b, i32 3
+ %cmp0 = icmp ne i32 %c0, 0
+ %cmp1 = icmp ne i32 %c1, 0
+ %cmp2 = icmp ne i32 %c2, 0
+ %cmp3 = icmp ne i32 %c3, 0
+ %s0 = select i1 %cmp0, float %a0, float %b0
+ %s1 = select i1 %cmp1, float %a1, float %b1
+ %s2 = select i1 %cmp2, float %a2, float %b2
+ %s3 = select i1 %cmp3, float %a3, float %b3
+ %ra = insertelement <4 x float> undef, float %s0, i32 0
+ %rb = insertelement <4 x float> %ra, float %s1, i32 1
+ %rc = insertelement <4 x float> %rb, float %s2, i32 2
+ %rd = insertelement <4 x float> %rc, float %s3, i32 3
+ ret <4 x float> zeroinitializer
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: test/Transforms/SLPVectorizer/lit.local.cfg
===================================================================
--- /dev/null
+++ test/Transforms/SLPVectorizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1471.3.patch
Type: text/x-patch
Size: 6764 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130822/644c10d5/attachment.bin>
More information about the llvm-commits
mailing list