[llvm] d1a7225 - [SLP]Check if the node must keep its original bitwidth

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 16 08:04:17 PST 2024


Author: Alexey Bataev
Date: 2024-12-16T08:01:22-08:00
New Revision: d1a7225076218ce224cd29c74259b715b393dc9d

URL: https://github.com/llvm/llvm-project/commit/d1a7225076218ce224cd29c74259b715b393dc9d
DIFF: https://github.com/llvm/llvm-project/commit/d1a7225076218ce224cd29c74259b715b393dc9d.diff

LOG: [SLP]Check if the node must keep its original bitwidth

Need to check if during previous analysis the node has requested to keep
its original bitwidth to avoid incorrect codegen.

Fixes #120076

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3bd983ee6e125d..d967813075bb9f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2931,13 +2931,11 @@ class BoUpSLP {
   /// truncation. We collect the entries that will be demoted in ToDemote.
   /// \param E Node for analysis
   /// \param ToDemote indices of the nodes to be demoted.
-  bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
-                             unsigned &BitWidth,
-                             SmallVectorImpl<unsigned> &ToDemote,
-                             DenseSet<const TreeEntry *> &Visited,
-                             unsigned &MaxDepthLevel,
-                             bool &IsProfitableToDemote,
-                             bool IsTruncRoot) const;
+  bool collectValuesToDemote(
+      const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
+      SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
+      const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
+      bool &IsProfitableToDemote, bool IsTruncRoot) const;
 
   /// Check if the operands on the edges \p Edges of the \p UserTE allows
   /// reordering (i.e. the operands can be reordered because they have only one
@@ -17515,8 +17513,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
 bool BoUpSLP::collectValuesToDemote(
     const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
     SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
-    unsigned &MaxDepthLevel, bool &IsProfitableToDemote,
-    bool IsTruncRoot) const {
+    const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
+    bool &IsProfitableToDemote, bool IsTruncRoot) const {
   // We can always demote constants.
   if (all_of(E.Scalars, IsaPred<Constant>))
     return true;
@@ -17528,6 +17526,10 @@ bool BoUpSLP::collectValuesToDemote(
     return true;
   }
 
+  // Check if the node was analyzed already and must keep its original bitwidth.
+  if (NodesToKeepBWs.contains(E.Idx))
+    return false;
+
   // If the value is not a vectorized instruction in the expression and not used
   // by the insertelement instruction and not used in multiple vector nodes, it
   // cannot be demoted.
@@ -17623,8 +17625,8 @@ bool BoUpSLP::collectValuesToDemote(
     for (const TreeEntry *Op : Operands) {
       unsigned Level = InitLevel;
       if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
-                                 ToDemote, Visited, Level, IsProfitableToDemote,
-                                 IsTruncRoot)) {
+                                 ToDemote, Visited, NodesToKeepBWs, Level,
+                                 IsProfitableToDemote, IsTruncRoot)) {
         if (!IsProfitableToDemote)
           return false;
         NeedToExit = true;
@@ -17926,6 +17928,7 @@ void BoUpSLP::computeMinimumValueSizes() {
   bool IsTruncRoot = false;
   bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
   SmallVector<unsigned> RootDemotes;
+  SmallDenseSet<unsigned, 8> NodesToKeepBWs;
   if (NodeIdx != 0 &&
       VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
       VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
@@ -17949,6 +17952,7 @@ void BoUpSLP::computeMinimumValueSizes() {
     // Check if the root is trunc and the next node is gather/buildvector, then
     // keep trunc in scalars, which is free in most cases.
     if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
+        !NodesToKeepBWs.contains(E.Idx) &&
         E.Idx > (IsStoreOrInsertElt ? 2u : 1u) &&
         all_of(E.Scalars, [&](Value *V) {
           return V->hasOneUse() || isa<Constant>(V) ||
@@ -18071,8 +18075,8 @@ void BoUpSLP::computeMinimumValueSizes() {
     bool NeedToDemote = IsProfitableToDemote;
 
     if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
-                               ToDemote, Visited, MaxDepthLevel, NeedToDemote,
-                               IsTruncRoot) ||
+                               ToDemote, Visited, NodesToKeepBWs, MaxDepthLevel,
+                               NeedToDemote, IsTruncRoot) ||
         (MaxDepthLevel <= Limit &&
          !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
             (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
@@ -18206,7 +18210,7 @@ void BoUpSLP::computeMinimumValueSizes() {
                  });
     }
 
-    // If the maximum bit width we compute is less than the with of the roots'
+    // If the maximum bit width we compute is less than the width of the roots'
     // type, we can proceed with the narrowing. Otherwise, do nothing.
     if (MaxBitWidth == 0 ||
         MaxBitWidth >=
@@ -18214,6 +18218,7 @@ void BoUpSLP::computeMinimumValueSizes() {
                 ->getBitWidth()) {
       if (UserIgnoreList)
         AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
+      NodesToKeepBWs.insert(ToDemote.begin(), ToDemote.end());
       continue;
     }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
index afc38bdf00c432..d3d7f21ee1003c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll
@@ -6,10 +6,11 @@ define i8 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[SUB_I_I79_PEEL_I]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i16> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i16> [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i16> [[TMP3]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8
 ; CHECK-NEXT:    ret i8 [[CONV13_I89_PEEL_I]]


        


More information about the llvm-commits mailing list