[PATCH] D29900: [SLP] Fix for PR31879: vectorize repeated scalar ops that don't get put back into a vector
Alexey Bataev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 07:32:28 PST 2017
This revision was automatically updated to reflect the committed changes.
Closed by commit rL295056: [SLP] Fix for PR31879: vectorize repeated scalar ops that don't get put (authored by ABataev).
Changed prior to commit:
https://reviews.llvm.org/D29900?vs=88357&id=88373#toc
Repository:
rL LLVM
https://reviews.llvm.org/D29900
Files:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll
Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1660,7 +1660,13 @@
int DeadCost = 0;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
Instruction *E = cast<Instruction>(VL[i]);
- if (E->hasOneUse())
+ // If all users are going to be vectorized, instruction can be
+ // considered as dead.
+ // The same, if have only one user, it will be vectorized for sure.
+ if (E->hasOneUse() ||
+ std::all_of(E->user_begin(), E->user_end(), [this](User *U) {
+ return ScalarToTreeEntry.count(U) > 0;
+ }))
// Take credit for instruction that will become dead.
DeadCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll
===================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll
@@ -7,11 +7,10 @@
define float @f(<2 x float> %x) {
; CHECK-LABEL: @f(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
-; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
-; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
@@ -24,13 +23,13 @@
define float @f_used_out_of_tree(<2 x float> %x) {
; THRESH2-LABEL: @f_used_out_of_tree(
-; THRESH2-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; THRESH2-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
-; THRESH2-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
-; THRESH2-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
-; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
+; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH2-NEXT: store float [[ADD]], float* @a
-; THRESH2-NEXT: ret float [[X0]]
+; THRESH2-NEXT: ret float [[TMP1]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
@@ -43,12 +42,15 @@
define float @f_used_twice_in_tree(<2 x float> %x) {
; THRESH1-LABEL: @f_used_twice_in_tree(
-; THRESH1-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; THRESH1-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
-; THRESH1-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X1]]
-; THRESH1-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
-; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
+; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]]
+; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH1-NEXT: ret float [[ADD]]
+;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
%x0x0 = fmul float %x0, %x1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29900.88373.patch
Type: text/x-patch
Size: 4225 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170214/3de9f827/attachment.bin>
More information about the llvm-commits
mailing list