[llvm] 7567f5b - Revert "[SLP]Do extra analysis int minbitwidth if some checks return false."

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 15 03:53:09 PDT 2024


Author: Alexey Bataev
Date: 2024-03-15T03:52:58-07:00
New Revision: 7567f5ba789cce32a33e2661301c1b8bb629d47d

URL: https://github.com/llvm/llvm-project/commit/7567f5ba789cce32a33e2661301c1b8bb629d47d
DIFF: https://github.com/llvm/llvm-project/commit/7567f5ba789cce32a33e2661301c1b8bb629d47d.diff

LOG: Revert "[SLP]Do extra analysis int minbitwidth if some checks return false."

This reverts commit ea429e19f56005bf89e717c14efdf49ec055b183 to fix
issues reported in https://github.com/llvm/llvm-project/pull/84536#issuecomment-1999295445.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 57cb0b1a2e6d04..c95468d919da6b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10226,11 +10226,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
       for (const TreeEntry *TE : ForRemoval)
         Set.erase(TE);
     }
-    bool NeedToRemapValues = false;
     for (auto *It = UsedTEs.begin(); It != UsedTEs.end();) {
       if (It->empty()) {
         UsedTEs.erase(It);
-        NeedToRemapValues = true;
         continue;
       }
       std::advance(It, 1);
@@ -10239,19 +10237,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
       Entries.clear();
       return std::nullopt;
     }
-    // Recalculate the mapping between the values and entries sets.
-    if (NeedToRemapValues) {
-      DenseMap<Value *, int> PrevUsedValuesEntry;
-      PrevUsedValuesEntry.swap(UsedValuesEntry);
-      for (auto [Idx, Set] : enumerate(UsedTEs)) {
-        DenseSet<Value *> Values;
-        for (const TreeEntry *E : Set)
-          Values.insert(E->Scalars.begin(), E->Scalars.end());
-        for (const auto &P : PrevUsedValuesEntry)
-          if (Values.contains(P.first))
-            UsedValuesEntry.try_emplace(P.first, Idx);
-      }
-    }
   }
 
   unsigned VF = 0;
@@ -14022,33 +14007,6 @@ bool BoUpSLP::collectValuesToDemote(
   };
   unsigned Start = 0;
   unsigned End = I->getNumOperands();
-
-  auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
-    if (!IsProfitableToDemote)
-      return false;
-    return (ITE && ITE->UserTreeIndices.size() > 1) ||
-           IsPotentiallyTruncated(I, BitWidth);
-  };
-  auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
-    NeedToExit = false;
-    unsigned InitLevel = MaxDepthLevel;
-    for (Value *IncValue : Operands) {
-      unsigned Level = InitLevel;
-      if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
-                                 ToDemote, DemotedConsts, Visited, Level,
-                                 IsProfitableToDemote, IsTruncRoot)) {
-        if (!IsProfitableToDemote)
-          return false;
-        NeedToExit = true;
-        if (!FinalAnalysis(ITE))
-          return false;
-        continue;
-      }
-      MaxDepthLevel = std::max(MaxDepthLevel, Level);
-    }
-    return true;
-  };
-  bool NeedToExit = false;
   switch (I->getOpcode()) {
 
   // We can always demote truncations and extensions. Since truncations can
@@ -14074,21 +14032,35 @@ bool BoUpSLP::collectValuesToDemote(
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
-      return false;
-    if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
+    unsigned Level1, Level2;
+    if ((ITE->UserTreeIndices.size() > 1 &&
+         !IsPotentiallyTruncated(I, BitWidth)) ||
+        !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
+                               BitWidth, ToDemote, DemotedConsts, Visited,
+                               Level1, IsProfitableToDemote, IsTruncRoot) ||
+        !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
+                               BitWidth, ToDemote, DemotedConsts, Visited,
+                               Level2, IsProfitableToDemote, IsTruncRoot))
       return false;
+    MaxDepthLevel = std::max(Level1, Level2);
     break;
   }
 
   // We can demote selects if we can demote their true and false values.
   case Instruction::Select: {
-    if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
-      return false;
     Start = 1;
-    auto *SI = cast<SelectInst>(I);
-    if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
+    unsigned Level1, Level2;
+    SelectInst *SI = cast<SelectInst>(I);
+    if ((ITE->UserTreeIndices.size() > 1 &&
+         !IsPotentiallyTruncated(I, BitWidth)) ||
+        !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
+                               BitWidth, ToDemote, DemotedConsts, Visited,
+                               Level1, IsProfitableToDemote, IsTruncRoot) ||
+        !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
+                               BitWidth, ToDemote, DemotedConsts, Visited,
+                               Level2, IsProfitableToDemote, IsTruncRoot))
       return false;
+    MaxDepthLevel = std::max(Level1, Level2);
     break;
   }
 
@@ -14099,20 +14071,22 @@ bool BoUpSLP::collectValuesToDemote(
     MaxDepthLevel = 0;
     if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
       return false;
-    SmallVector<Value *> Ops(PN->incoming_values().begin(),
-                             PN->incoming_values().end());
-    if (!ProcessOperands(Ops, NeedToExit))
-      return false;
+    for (Value *IncValue : PN->incoming_values()) {
+      unsigned Level;
+      if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
+                                 ToDemote, DemotedConsts, Visited, Level,
+                                 IsProfitableToDemote, IsTruncRoot))
+        return false;
+      MaxDepthLevel = std::max(MaxDepthLevel, Level);
+    }
     break;
   }
 
   // Otherwise, conservatively give up.
   default:
     MaxDepthLevel = 1;
-    return FinalAnalysis();
+    return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth);
   }
-  if (NeedToExit)
-    return true;
 
   ++MaxDepthLevel;
   // Gather demoted constant operands.
@@ -14151,7 +14125,6 @@ void BoUpSLP::computeMinimumValueSizes() {
 
   // The first value node for store/insertelement is sext/zext/trunc? Skip it,
   // resize to the final type.
-  bool IsTruncRoot = false;
   bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
   if (NodeIdx != 0 &&
       VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
@@ -14159,9 +14132,8 @@ void BoUpSLP::computeMinimumValueSizes() {
        VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
        VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
     assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
-    IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
-    IsProfitableToDemoteRoot = true;
     ++NodeIdx;
+    IsProfitableToDemoteRoot = true;
   }
 
   // Analyzed in reduction already and not profitable - exit.
@@ -14293,6 +14265,7 @@ void BoUpSLP::computeMinimumValueSizes() {
     ReductionBitWidth = bit_ceil(ReductionBitWidth);
   }
   bool IsTopRoot = NodeIdx == 0;
+  bool IsTruncRoot = false;
   while (NodeIdx < VectorizableTree.size() &&
          VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
          VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
index 02d1f9f60d0ca1..1986b51ec94828 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -228,7 +228,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; YAML-NEXT: Function:        test_unrolled_select
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
-; YAML-NEXT:   - Cost:            '-40'
+; YAML-NEXT:   - Cost:            '-36'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '10'
 
@@ -246,17 +246,15 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali
 ; CHECK-NEXT:    [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
 ; CHECK-NEXT:    [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = trunc <8 x i16> [[TMP4]] to <8 x i1>
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <8 x i1> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP7]], <8 x i16> [[TMP4]]
-; CHECK-NEXT:    [[TMP9:%.*]] = zext <8 x i16> [[TMP8]] to <8 x i32>
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
-; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP10]], [[S_047]]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
+; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP8]], [[S_047]]
 ; CHECK-NEXT:    [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
 ; CHECK:       if.end.86:


        


More information about the llvm-commits mailing list