[llvm] 54ca1e2 - [SLP]Fix PR80027: include initial trunc nodes to the demoted values.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 26 06:41:17 PDT 2024


Author: Alexey Bataev
Date: 2024-03-26T06:40:57-07:00
New Revision: 54ca1e2c041a3780eca83549d7f7137581d32abb

URL: https://github.com/llvm/llvm-project/commit/54ca1e2c041a3780eca83549d7f7137581d32abb
DIFF: https://github.com/llvm/llvm-project/commit/54ca1e2c041a3780eca83549d7f7137581d32abb.diff

LOG: [SLP]Fix PR80027: include initial trunc nodes to the demoted values.

Need to include initial sext/zext/trunc nodes to the list of the demoted
root values to correctly calculate the cost and handle the
vectorization.

Added: 
    llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f98d15c285a693..f7cb06ad3e961a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14251,6 +14251,7 @@ void BoUpSLP::computeMinimumValueSizes() {
   // resize to the final type.
   bool IsTruncRoot = false;
   bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
+  SmallVector<unsigned> RootDemotes;
   if (NodeIdx != 0 &&
       VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
       (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
@@ -14258,6 +14259,7 @@ void BoUpSLP::computeMinimumValueSizes() {
        VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
     assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
     IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
+    RootDemotes.push_back(NodeIdx);
     IsProfitableToDemoteRoot = true;
     ++NodeIdx;
   }
@@ -14394,6 +14396,7 @@ void BoUpSLP::computeMinimumValueSizes() {
   while (NodeIdx < VectorizableTree.size() &&
          VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
          VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
+    RootDemotes.push_back(NodeIdx);
     ++NodeIdx;
     IsTruncRoot = true;
   }
@@ -14409,14 +14412,22 @@ void BoUpSLP::computeMinimumValueSizes() {
     unsigned MaxBitWidth = ComputeMaxBitWidth(
         TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot,
         IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot);
+    for (unsigned Idx : RootDemotes)
+      ToDemote.append(VectorizableTree[Idx]->Scalars.begin(),
+                      VectorizableTree[Idx]->Scalars.end());
+    RootDemotes.clear();
     IsTopRoot = false;
     IsProfitableToDemoteRoot = true;
 
     if (TruncNodes.empty()) {
       NodeIdx = VectorizableTree.size();
     } else {
-      NodeIdx = *TruncNodes.begin() + 1;
-      TruncNodes.erase(TruncNodes.begin());
+      unsigned NewIdx = 0;
+      do {
+        NewIdx = *TruncNodes.begin() + 1;
+        TruncNodes.erase(TruncNodes.begin());
+      } while (NewIdx <= NodeIdx && !TruncNodes.empty());
+      NodeIdx = NewIdx;
       IsTruncRoot = true;
     }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
index 31f16801b7a64e..6388cc2dedc73a 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
@@ -7,17 +7,9 @@ define void @partial_vec_invalid_cost() #0 {
 ; CHECK-LABEL: define void @partial_vec_invalid_cost(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LSHR_1:%.*]] = lshr i96 0, 0
-; CHECK-NEXT:    [[LSHR_2:%.*]] = lshr i96 0, 0
-; CHECK-NEXT:    [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32
-; CHECK-NEXT:    [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32
-; CHECK-NEXT:    [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32
-; CHECK-NEXT:    [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]]
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]]
-; CHECK-NEXT:    [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]]
-; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96
 ; CHECK-NEXT:    store i96 [[STORE_THIS]], ptr null, align 16
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
new file mode 100644
index 00000000000000..cfe3ca9f8f9e5f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz -mcpu=z15 %s | FileCheck %s
+
+define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[A:%.*]], i8 [[TMP0:%.*]], i16 [[B_PROMOTED_I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP0]] to i128
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[B_PROMOTED_I]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i128> poison, i128 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
+; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1>
+; CHECK-NEXT:    [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]])
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i1 [[TMP10]] to i64
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i64 [[TMP11]], 1
+; CHECK-NEXT:    store i64 [[OP_RDX]], ptr [[A]], align 8
+; CHECK-NEXT:    ret void
+;
+  %2 = zext i8 %0 to i128
+  %3 = zext i16 %b.promoted.i to i128
+  %4 = or i128 %3, %2
+  %5 = trunc i128 %4 to i64
+  %6 = and i64 %5, 1
+  %7 = zext i16 %b.promoted.i to i128
+  %8 = or i128 %7, %2
+  %9 = trunc i128 %8 to i64
+  %10 = and i64 %6, %9
+  %11 = zext i16 %b.promoted.i to i128
+  %12 = or i128 %11, %2
+  %13 = trunc i128 %12 to i64
+  %14 = and i64 %10, %13
+  %15 = zext i16 %b.promoted.i to i128
+  %16 = or i128 %15, %2
+  %17 = trunc i128 %16 to i64
+  %18 = and i64 %14, %17
+  store i64 %18, ptr %a, align 8
+  ret void
+}
+


        


More information about the llvm-commits mailing list