[PATCH] D29900: [SLP] Fix for PR31879: vectorize repeated scalar ops that don't get put back into a vector

Alexey Bataev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 14 05:26:09 PST 2017


ABataev updated this revision to Diff 88357.
ABataev added a comment.

Consider extractelement instructions dead if all users are going to be vectorized.


https://reviews.llvm.org/D29900

Files:
  lib/Transforms/Vectorize/SLPVectorizer.cpp
  test/Transforms/SLPVectorizer/X86/extractelement.ll


Index: test/Transforms/SLPVectorizer/X86/extractelement.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/extractelement.ll
+++ test/Transforms/SLPVectorizer/X86/extractelement.ll
@@ -7,11 +7,10 @@
 
 define float @f(<2 x float> %x) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:    [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT:    [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
-; CHECK-NEXT:    [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
-; CHECK-NEXT:    [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
-; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret float [[ADD]]
 ;
   %x0 = extractelement <2 x float> %x, i32 0
@@ -24,13 +23,13 @@
 
 define float @f_used_out_of_tree(<2 x float> %x) {
 ; THRESH2-LABEL: @f_used_out_of_tree(
-; THRESH2-NEXT:    [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; THRESH2-NEXT:    [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
-; THRESH2-NEXT:    [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
-; THRESH2-NEXT:    [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
-; THRESH2-NEXT:    [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; THRESH2-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; THRESH2-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
+; THRESH2-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; THRESH2-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; THRESH2-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
 ; THRESH2-NEXT:    store float [[ADD]], float* @a
-; THRESH2-NEXT:    ret float [[X0]]
+; THRESH2-NEXT:    ret float [[TMP1]]
 ;
   %x0 = extractelement <2 x float> %x, i32 0
   %x1 = extractelement <2 x float> %x, i32 1
@@ -43,12 +42,15 @@
 
 define float @f_used_twice_in_tree(<2 x float> %x) {
 ; THRESH1-LABEL: @f_used_twice_in_tree(
-; THRESH1-NEXT:    [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; THRESH1-NEXT:    [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
-; THRESH1-NEXT:    [[X0X0:%.*]] = fmul float [[X0]], [[X1]]
-; THRESH1-NEXT:    [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
-; THRESH1-NEXT:    [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
+; THRESH1-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; THRESH1-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
+; THRESH1-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; THRESH1-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]]
+; THRESH1-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; THRESH1-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; THRESH1-NEXT:    [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
 ; THRESH1-NEXT:    ret float [[ADD]]
+;
   %x0 = extractelement <2 x float> %x, i32 0
   %x1 = extractelement <2 x float> %x, i32 1
   %x0x0 = fmul float %x0, %x1
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1660,7 +1660,10 @@
         int DeadCost = 0;
         for (unsigned i = 0, e = VL.size(); i < e; ++i) {
           Instruction *E = cast<Instruction>(VL[i]);
-          if (E->hasOneUse())
+          if (E->hasOneUse() ||
+              std::all_of(E->user_begin(), E->user_end(), [this](User *U) {
+                return ScalarToTreeEntry.count(U) > 0;
+              }))
             // Take credit for instruction that will become dead.
             DeadCost +=
                 TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29900.88357.patch
Type: text/x-patch
Size: 3972 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170214/e0bdc5b1/attachment.bin>


More information about the llvm-commits mailing list