[PATCH] D101460: [SLP]Try to vectorize tiny trees with shuffled gathers of extractelements.
Alexey Bataev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu May 20 08:38:15 PDT 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rG182162b61629: [SLP]Try to vectorize tiny trees with shuffled gathers of extractelements. (authored by ABataev).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D101460/new/
https://reviews.llvm.org/D101460
Files:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
Index: llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
+++ llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
@@ -12,21 +12,18 @@
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
-; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
-; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
-; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[S0]]
+; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[GEP0]], align 4
-; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
-; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
-; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[S1]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP2]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 4
-; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
-; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
-; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[S2]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP3]]
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 4
-; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
-; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
-; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[S3]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP4]]
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, i64* [[GEP3]], align 4
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
; CHECK-NEXT: ret void
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4140,13 +4140,18 @@
// Handle splat and all-constants stores. Also try to vectorize tiny trees
// with the second gather nodes if they have less scalar operands rather than
- // the initial tree element (may be profitable to shuffle the second gather).
+ // the initial tree element (may be profitable to shuffle the second gather)
+ // or they are extractelements, which form shuffle.
+ SmallVector<int> Mask;
if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
(allConstant(VectorizableTree[1]->Scalars) ||
isSplat(VectorizableTree[1]->Scalars) ||
(VectorizableTree[1]->State == TreeEntry::NeedToGather &&
VectorizableTree[1]->Scalars.size() <
- VectorizableTree[0]->Scalars.size())))
+ VectorizableTree[0]->Scalars.size()) ||
+ (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
+ VectorizableTree[1]->getOpcode() == Instruction::ExtractElement &&
+ isShuffle(VectorizableTree[1]->Scalars, Mask))))
return true;
// Gathering cost would be too much for tiny trees.
@@ -6088,6 +6093,9 @@
break;
case Instruction::ZExt:
case Instruction::SExt:
+ if (isa<ExtractElementInst>(I->getOperand(0)) ||
+ isa<InsertElementInst>(I->getOperand(0)))
+ return false;
break;
// We can demote certain binary operations if we can demote both of their
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101460.346754.patch
Type: text/x-patch
Size: 4032 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210520/ee2d9930/attachment-0001.bin>
More information about the llvm-commits
mailing list