[llvm] 8af4723 - [SLP]Try to vectorize tiny trees with shuffled gathers.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 28 06:43:17 PDT 2021


Author: Alexey Bataev
Date: 2021-04-28T06:35:31-07:00
New Revision: 8af4723c5819e1c20c423987cd560d6f981b3d5c

URL: https://github.com/llvm/llvm-project/commit/8af4723c5819e1c20c423987cd560d6f981b3d5c
DIFF: https://github.com/llvm/llvm-project/commit/8af4723c5819e1c20c423987cd560d6f981b3d5c.diff

LOG: [SLP]Try to vectorize tiny trees with shuffled gathers.

If the first tree element is vectorize and the second is gather, it
still might be profitable to vectorize it if the gather node contains
less scalars to vectorize than the original tree node. It might be
profitable to use shuffles.

Differential Revision: https://reviews.llvm.org/D101397

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 900df48c80d8b..5a88b5c223d79 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4032,10 +4032,15 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
   if (VectorizableTree.size() != 2)
     return false;
 
-  // Handle splat and all-constants stores.
+  // Handle splat and all-constants stores. Also try to vectorize tiny trees
+  // with the second gather nodes if they have less scalar operands rather than
+  // the initial tree element (may be profitable to shuffle the second gather).
   if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
       (allConstant(VectorizableTree[1]->Scalars) ||
-       isSplat(VectorizableTree[1]->Scalars)))
+       isSplat(VectorizableTree[1]->Scalars) ||
+       (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
+        VectorizableTree[1]->Scalars.size() <
+            VectorizableTree[0]->Scalars.size())))
     return true;
 
   // Gathering cost would be too much for tiny trees.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index 65f02770ab853..5548a828b778a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -272,21 +272,18 @@ define void @tiny_vector_gather(i32 *%a, i32 *%v1, i32 *%v2) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[V1:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[V2:%.*]], align 4
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR0]], align 16
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR1]], align 4
 ; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR2]], align 8
 ; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR3]], align 4
 ; CHECK-NEXT:    [[PTR4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR4]], align 16
 ; CHECK-NEXT:    [[PTR5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 5
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR5]], align 4
 ; CHECK-NEXT:    [[PTR6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 6
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR6]], align 8
 ; CHECK-NEXT:    [[PTR7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 7
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR7]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[PTR0]] to <8 x i32>*
+; CHECK-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP5]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %1 = load i32, i32* %v1, align 4


        


More information about the llvm-commits mailing list