[llvm] 5d13b12 - [SLP]Improve isUndefVector function by adding insertelement analysis.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 16 14:41:06 PDT 2022
Author: Alexey Bataev
Date: 2022-09-16T14:36:38-07:00
New Revision: 5d13b12674c620774c733205703da836cefc6782
URL: https://github.com/llvm/llvm-project/commit/5d13b12674c620774c733205703da836cefc6782
DIFF: https://github.com/llvm/llvm-project/commit/5d13b12674c620774c733205703da836cefc6782.diff
LOG: [SLP]Improve isUndefVector function by adding insertelement analysis.
Added the mask and the analysis of the buildvector sequence in the
isUndefVector function, improves codegen and cost estimation.
Metric: SLP.NumVectorInstructions
Program SLP.NumVectorInstructions
results results0 diff
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 27362.00 27360.00 -0.0%
Metric: size..text
Program size..text
results results0 diff
test-suite :: External/SPEC/CFP2017rate/508.namd_r/508.namd_r.test 805299.00 806035.00 0.1%
526.blender_r - some extra code is vectorized.
508.namd_r - some extra code is optimized out.
Differential Revision: https://reviews.llvm.org/D133891
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 39033fa1f8245..374e2c285682b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -319,20 +319,35 @@ static Optional<unsigned> getInsertIndex(const Value *InsertInst,
}
/// Checks if the given value is actually an undefined constant vector.
-static bool isUndefVector(const Value *V) {
+/// Also, if the\p ShuffleMask is not empty, tries to check if the non-masked
+/// elements actually mask the insertelement buildvector, if any.
+static bool isUndefVector(const Value *V, ArrayRef<int> ShuffleMask = None) {
if (isa<UndefValue>(V))
return true;
- auto *C = dyn_cast<Constant>(V);
- if (!C)
- return false;
- if (!C->containsUndefOrPoisonElement())
- return false;
- auto *VecTy = dyn_cast<FixedVectorType>(C->getType());
+ auto *VecTy = dyn_cast<FixedVectorType>(V->getType());
if (!VecTy)
return false;
+ auto *C = dyn_cast<Constant>(V);
+ if (!C) {
+ if (!ShuffleMask.empty()) {
+ const Value *Base = V;
+ while (auto *II = dyn_cast<InsertElementInst>(Base)) {
+ Base = II->getOperand(0);
+ Optional<unsigned> Idx = getInsertIndex(II);
+ if (!Idx)
+ continue;
+ if (*Idx < ShuffleMask.size() && ShuffleMask[*Idx] == UndefMaskElem)
+ return false;
+ }
+ return V != Base && isUndefVector(Base);
+ }
+ return false;
+ }
for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
if (Constant *Elem = C->getAggregateElement(I))
- if (!isa<UndefValue>(Elem))
+ if (!isa<UndefValue>(Elem) &&
+ (ShuffleMask.empty() ||
+ (I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem)))
return false;
}
return true;
@@ -6360,8 +6375,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// initial vector or inserting a subvector.
// TODO: Implement the analysis of the FirstInsert->getOperand(0)
// subvector of ActualVecTy.
- if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
- !IsWholeSubvector) {
+ SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+ copy(Mask, std::next(InsertMask.begin(), OffsetBeg));
+ if (!isUndefVector(FirstInsert->getOperand(0), InsertMask) &&
+ NumScalars != NumElts && !IsWholeSubvector) {
if (InsertVecSz != VecSz) {
auto *ActualVecTy =
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
@@ -7056,7 +7073,7 @@ static T *performExtractsShuffleAction(
SmallVector<int> Mask(ShuffleMask.begin()->second);
auto VMIt = std::next(ShuffleMask.begin());
T *Prev = nullptr;
- bool IsBaseNotUndef = !isUndefVector(Base);
+ bool IsBaseNotUndef = !isUndefVector(Base, Mask);
if (IsBaseNotUndef) {
// Base is not undef, need to combine it with the next subvectors.
std::pair<T *, bool> Res =
@@ -8106,14 +8123,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
}
- if ((!IsIdentity || Offset != 0 ||
- !isUndefVector(FirstInsert->getOperand(0))) &&
- NumElts != NumScalars) {
- SmallVector<int> InsertMask(NumElts);
- std::iota(InsertMask.begin(), InsertMask.end(), 0);
+ SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+ for (unsigned I = 0; I < NumElts; I++) {
+ if (Mask[I] != UndefMaskElem)
+ InsertMask[Offset + I] = NumElts + I;
+ }
+ if (Offset != 0 ||
+ !isUndefVector(FirstInsert->getOperand(0), InsertMask)) {
for (unsigned I = 0; I < NumElts; I++) {
- if (Mask[I] != UndefMaskElem)
- InsertMask[Offset + I] = NumElts + I;
+ if (InsertMask[I] == UndefMaskElem)
+ InsertMask[I] = I;
}
V = Builder.CreateShuffleVector(
@@ -8792,8 +8811,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
SV->isZeroEltSplat())
break;
- bool IsOp1Undef = isUndefVector(SV->getOperand(0));
- bool IsOp2Undef = isUndefVector(SV->getOperand(1));
+ bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask);
+ bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask);
if (!IsOp1Undef && !IsOp2Undef)
break;
SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
@@ -8813,7 +8832,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
&CombineMasks](Value *V1, Value *V2,
ArrayRef<int> Mask) -> Value * {
assert(V1 && "Expected at least one vector value.");
- if (V2 && !isUndefVector(V2)) {
+ if (V2 && !isUndefVector(V2, Mask)) {
// Peek through shuffles.
Value *Op1 = V1;
Value *Op2 = V2;
@@ -8841,8 +8860,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
if (SV1->getOperand(0)->getType() ==
SV2->getOperand(0)->getType() &&
SV1->getOperand(0)->getType() != SV1->getType() &&
- isUndefVector(SV1->getOperand(1)) &&
- isUndefVector(SV2->getOperand(1))) {
+ isUndefVector(SV1->getOperand(1), CombinedMask1) &&
+ isUndefVector(SV2->getOperand(1), CombinedMask2)) {
Op1 = SV1->getOperand(0);
Op2 = SV2->getOperand(0);
SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
index 281169f7a69fe..82a42a7444a21 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -12,7 +12,7 @@ define void @test() {
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
index 36347b4843fe7..b9662c8b64bfb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -53,11 +53,9 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) {
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0
-; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP7]], 0
+; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP8]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 09487d560de4a..5be933e0e4815 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -44,8 +44,7 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 undef, i32 3>
-; CHECK-NEXT: [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 6, i32 15>
-; CHECK-NEXT: ret <8 x float> [[RD1]]
+; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
More information about the llvm-commits
mailing list