[llvm] 5d13b12 - [SLP]Improve isUndefVector function by adding insertelement analysis.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 16 14:41:06 PDT 2022


Author: Alexey Bataev
Date: 2022-09-16T14:36:38-07:00
New Revision: 5d13b12674c620774c733205703da836cefc6782

URL: https://github.com/llvm/llvm-project/commit/5d13b12674c620774c733205703da836cefc6782
DIFF: https://github.com/llvm/llvm-project/commit/5d13b12674c620774c733205703da836cefc6782.diff

LOG: [SLP]Improve isUndefVector function by adding insertelement analysis.

Added the mask and the analysis of the buildvector sequence in the
isUndefVector function, improves codegen and cost estimation.

Metric: SLP.NumVectorInstructions

Program                                                                                       SLP.NumVectorInstructions
                                                                          results                   results0 diff
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 27362.00                  27360.00 -0.0%

Metric: size..text

Program                                                                                                           size..text
                                                                   results     results0    diff
test-suite :: External/SPEC/CFP2017rate/508.namd_r/508.namd_r.test   805299.00   806035.00  0.1%

526.blender_r - some extra code is vectorized.
508.namd_r - some extra code is optimized out.

Differential Revision: https://reviews.llvm.org/D133891

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
    llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 39033fa1f8245..374e2c285682b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -319,20 +319,35 @@ static Optional<unsigned> getInsertIndex(const Value *InsertInst,
 }
 
 /// Checks if the given value is actually an undefined constant vector.
-static bool isUndefVector(const Value *V) {
+/// Also, if the\p ShuffleMask is not empty, tries to check if the non-masked
+/// elements actually mask the insertelement buildvector, if any.
+static bool isUndefVector(const Value *V, ArrayRef<int> ShuffleMask = None) {
   if (isa<UndefValue>(V))
     return true;
-  auto *C = dyn_cast<Constant>(V);
-  if (!C)
-    return false;
-  if (!C->containsUndefOrPoisonElement())
-    return false;
-  auto *VecTy = dyn_cast<FixedVectorType>(C->getType());
+  auto *VecTy = dyn_cast<FixedVectorType>(V->getType());
   if (!VecTy)
     return false;
+  auto *C = dyn_cast<Constant>(V);
+  if (!C) {
+    if (!ShuffleMask.empty()) {
+      const Value *Base = V;
+      while (auto *II = dyn_cast<InsertElementInst>(Base)) {
+        Base = II->getOperand(0);
+        Optional<unsigned> Idx = getInsertIndex(II);
+        if (!Idx)
+          continue;
+        if (*Idx < ShuffleMask.size() && ShuffleMask[*Idx] == UndefMaskElem)
+          return false;
+      }
+      return V != Base && isUndefVector(Base);
+    }
+    return false;
+  }
   for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
     if (Constant *Elem = C->getAggregateElement(I))
-      if (!isa<UndefValue>(Elem))
+      if (!isa<UndefValue>(Elem) &&
+          (ShuffleMask.empty() ||
+           (I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem)))
         return false;
   }
   return true;
@@ -6360,8 +6375,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       // initial vector or inserting a subvector.
       // TODO: Implement the analysis of the FirstInsert->getOperand(0)
       // subvector of ActualVecTy.
-      if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
-          !IsWholeSubvector) {
+      SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+      copy(Mask, std::next(InsertMask.begin(), OffsetBeg));
+      if (!isUndefVector(FirstInsert->getOperand(0), InsertMask) &&
+          NumScalars != NumElts && !IsWholeSubvector) {
         if (InsertVecSz != VecSz) {
           auto *ActualVecTy =
               FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
@@ -7056,7 +7073,7 @@ static T *performExtractsShuffleAction(
   SmallVector<int> Mask(ShuffleMask.begin()->second);
   auto VMIt = std::next(ShuffleMask.begin());
   T *Prev = nullptr;
-  bool IsBaseNotUndef = !isUndefVector(Base);
+  bool IsBaseNotUndef = !isUndefVector(Base, Mask);
   if (IsBaseNotUndef) {
     // Base is not undef, need to combine it with the next subvectors.
     std::pair<T *, bool> Res =
@@ -8106,14 +8123,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         }
       }
 
-      if ((!IsIdentity || Offset != 0 ||
-           !isUndefVector(FirstInsert->getOperand(0))) &&
-          NumElts != NumScalars) {
-        SmallVector<int> InsertMask(NumElts);
-        std::iota(InsertMask.begin(), InsertMask.end(), 0);
+      SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+      for (unsigned I = 0; I < NumElts; I++) {
+        if (Mask[I] != UndefMaskElem)
+          InsertMask[Offset + I] = NumElts + I;
+      }
+      if (Offset != 0 ||
+          !isUndefVector(FirstInsert->getOperand(0), InsertMask)) {
         for (unsigned I = 0; I < NumElts; I++) {
-          if (Mask[I] != UndefMaskElem)
-            InsertMask[Offset + I] = NumElts + I;
+          if (InsertMask[I] == UndefMaskElem)
+            InsertMask[I] = I;
         }
 
         V = Builder.CreateShuffleVector(
@@ -8792,8 +8811,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
           if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
               SV->isZeroEltSplat())
             break;
-          bool IsOp1Undef = isUndefVector(SV->getOperand(0));
-          bool IsOp2Undef = isUndefVector(SV->getOperand(1));
+          bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask);
+          bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask);
           if (!IsOp1Undef && !IsOp2Undef)
             break;
           SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
@@ -8813,7 +8832,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
                           &CombineMasks](Value *V1, Value *V2,
                                          ArrayRef<int> Mask) -> Value * {
     assert(V1 && "Expected at least one vector value.");
-    if (V2 && !isUndefVector(V2)) {
+    if (V2 && !isUndefVector(V2, Mask)) {
       // Peek through shuffles.
       Value *Op1 = V1;
       Value *Op2 = V2;
@@ -8841,8 +8860,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
             if (SV1->getOperand(0)->getType() ==
                     SV2->getOperand(0)->getType() &&
                 SV1->getOperand(0)->getType() != SV1->getType() &&
-                isUndefVector(SV1->getOperand(1)) &&
-                isUndefVector(SV2->getOperand(1))) {
+                isUndefVector(SV1->getOperand(1), CombinedMask1) &&
+                isUndefVector(SV2->getOperand(1), CombinedMask2)) {
               Op1 = SV1->getOperand(0);
               Op2 = SV2->getOperand(0);
               SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
index 281169f7a69fe..82a42a7444a21 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -12,7 +12,7 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
 ; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
index 36347b4843fe7..b9662c8b64bfb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -53,11 +53,9 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0
-; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP7]], 0
+; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP8]], 1
 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } zeroinitializer
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 09487d560de4a..5be933e0e4815 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -44,8 +44,7 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 undef, i32 3>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 6, i32 15>
-; CHECK-NEXT:    ret <8 x float> [[RD1]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
   %c1 = extractelement <4 x i32> %c, i32 1


        


More information about the llvm-commits mailing list