[llvm] 54ca1e2 - [SLP]Fix PR80027: include initial trunc nodes to the demoted values.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 06:41:17 PDT 2024
Author: Alexey Bataev
Date: 2024-03-26T06:40:57-07:00
New Revision: 54ca1e2c041a3780eca83549d7f7137581d32abb
URL: https://github.com/llvm/llvm-project/commit/54ca1e2c041a3780eca83549d7f7137581d32abb
DIFF: https://github.com/llvm/llvm-project/commit/54ca1e2c041a3780eca83549d7f7137581d32abb.diff
LOG: [SLP]Fix PR80027: include initial trunc nodes to the demoted values.
Need to include initial sext/zext/trunc nodes to the list of the demoted
root values to correctly calculate the cost and handle the
vectorization.
Added:
llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f98d15c285a693..f7cb06ad3e961a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14251,6 +14251,7 @@ void BoUpSLP::computeMinimumValueSizes() {
// resize to the final type.
bool IsTruncRoot = false;
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
+ SmallVector<unsigned> RootDemotes;
if (NodeIdx != 0 &&
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
@@ -14258,6 +14259,7 @@ void BoUpSLP::computeMinimumValueSizes() {
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
+ RootDemotes.push_back(NodeIdx);
IsProfitableToDemoteRoot = true;
++NodeIdx;
}
@@ -14394,6 +14396,7 @@ void BoUpSLP::computeMinimumValueSizes() {
while (NodeIdx < VectorizableTree.size() &&
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
+ RootDemotes.push_back(NodeIdx);
++NodeIdx;
IsTruncRoot = true;
}
@@ -14409,14 +14412,22 @@ void BoUpSLP::computeMinimumValueSizes() {
unsigned MaxBitWidth = ComputeMaxBitWidth(
TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot,
IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot);
+ for (unsigned Idx : RootDemotes)
+ ToDemote.append(VectorizableTree[Idx]->Scalars.begin(),
+ VectorizableTree[Idx]->Scalars.end());
+ RootDemotes.clear();
IsTopRoot = false;
IsProfitableToDemoteRoot = true;
if (TruncNodes.empty()) {
NodeIdx = VectorizableTree.size();
} else {
- NodeIdx = *TruncNodes.begin() + 1;
- TruncNodes.erase(TruncNodes.begin());
+ unsigned NewIdx = 0;
+ do {
+ NewIdx = *TruncNodes.begin() + 1;
+ TruncNodes.erase(TruncNodes.begin());
+ } while (NewIdx <= NodeIdx && !TruncNodes.empty());
+ NodeIdx = NewIdx;
IsTruncRoot = true;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
index 31f16801b7a64e..6388cc2dedc73a 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
@@ -7,17 +7,9 @@ define void @partial_vec_invalid_cost() #0 {
; CHECK-LABEL: define void @partial_vec_invalid_cost(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i96 0, 0
-; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i96 0, 0
-; CHECK-NEXT: [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32
-; CHECK-NEXT: [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32
-; CHECK-NEXT: [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32
-; CHECK-NEXT: [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
-; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]]
-; CHECK-NEXT: [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]]
-; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]]
-; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96
; CHECK-NEXT: store i96 [[STORE_THIS]], ptr null, align 16
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
new file mode 100644
index 00000000000000..cfe3ca9f8f9e5f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz -mcpu=z15 %s | FileCheck %s
+
+define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[A:%.*]], i8 [[TMP0:%.*]], i16 [[B_PROMOTED_I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i128
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[B_PROMOTED_I]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i128> poison, i128 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]])
+; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i64
+; CHECK-NEXT: [[OP_RDX:%.*]] = and i64 [[TMP11]], 1
+; CHECK-NEXT: store i64 [[OP_RDX]], ptr [[A]], align 8
+; CHECK-NEXT: ret void
+;
+ %2 = zext i8 %0 to i128
+ %3 = zext i16 %b.promoted.i to i128
+ %4 = or i128 %3, %2
+ %5 = trunc i128 %4 to i64
+ %6 = and i64 %5, 1
+ %7 = zext i16 %b.promoted.i to i128
+ %8 = or i128 %7, %2
+ %9 = trunc i128 %8 to i64
+ %10 = and i64 %6, %9
+ %11 = zext i16 %b.promoted.i to i128
+ %12 = or i128 %11, %2
+ %13 = trunc i128 %12 to i64
+ %14 = and i64 %10, %13
+ %15 = zext i16 %b.promoted.i to i128
+ %16 = or i128 %15, %2
+ %17 = trunc i128 %16 to i64
+ %18 = and i64 %14, %17
+ store i64 %18, ptr %a, align 8
+ ret void
+}
+
More information about the llvm-commits
mailing list