[llvm] b9d3da8 - [SLP]Fix PR105904: the root node might be a gather node without user for reductions.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 26 07:09:26 PDT 2024
Author: Alexey Bataev
Date: 2024-08-26T07:09:05-07:00
New Revision: b9d3da8c8d277a7fc2223c659122bb377a0e54e0
URL: https://github.com/llvm/llvm-project/commit/b9d3da8c8d277a7fc2223c659122bb377a0e54e0
DIFF: https://github.com/llvm/llvm-project/commit/b9d3da8c8d277a7fc2223c659122bb377a0e54e0.diff
LOG: [SLP]Fix PR105904: the root node might be a gather node without user for reductions.
Before checking the user components of the gather/buildvector nodes,
need to check if the node has users at all. Root nodes might not have
users, if it is a node for the reduction.
Fixes https://github.com/llvm/llvm-project/issues/105904
Added:
llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 949579772b94d5..def73e8d0c0db7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3011,7 +3011,8 @@ class BoUpSLP {
}
bool isOperandGatherNode(const EdgeInfo &UserEI) const {
- return isGather() && UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
+ return isGather() && (Idx > 0 || !UserTreeIndices.empty()) &&
+ UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
UserTreeIndices.front().UserTE == UserEI.UserTE;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll
new file mode 100644
index 00000000000000..f07b6bbe8d6621
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvl512b < %s | FileCheck %s
+
+define void @test(ptr %c) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[C]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 222, i64 228, i64 276, i64 279, i64 282, i64 285, i64 288, i64 0>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 0, i64 345, i64 348, i64 351, i64 354, i64 357, i64 360, i64 363>
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP4]], i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP5]], i64 8)
+; CHECK-NEXT: [[TMP8]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP7]])
+; CHECK-NEXT: br label %[[FOR_COND]]
+;
+entry:
+ %arrayidx8.5.3 = getelementptr i8, ptr %c, i64 222
+ %0 = load i8, ptr %arrayidx8.5.3, align 1
+ %arrayidx8.7.3 = getelementptr i8, ptr %c, i64 228
+ %1 = load i8, ptr %arrayidx8.7.3, align 1
+ %arrayidx8.434 = getelementptr i8, ptr %c, i64 276
+ %2 = load i8, ptr %arrayidx8.434, align 1
+ %arrayidx8.1.4 = getelementptr i8, ptr %c, i64 279
+ %3 = load i8, ptr %arrayidx8.1.4, align 1
+ %arrayidx8.2.4 = getelementptr i8, ptr %c, i64 282
+ %4 = load i8, ptr %arrayidx8.2.4, align 1
+ %arrayidx8.3.4 = getelementptr i8, ptr %c, i64 285
+ %5 = load i8, ptr %arrayidx8.3.4, align 1
+ %arrayidx8.4.4 = getelementptr i8, ptr %c, i64 288
+ %6 = load i8, ptr %arrayidx8.4.4, align 1
+ %7 = load i8, ptr %c, align 1
+ %8 = load i8, ptr %c, align 1
+ %arrayidx8.536 = getelementptr i8, ptr %c, i64 345
+ %9 = load i8, ptr %arrayidx8.536, align 1
+ %arrayidx8.1.5 = getelementptr i8, ptr %c, i64 348
+ %10 = load i8, ptr %arrayidx8.1.5, align 1
+ %arrayidx8.2.5 = getelementptr i8, ptr %c, i64 351
+ %11 = load i8, ptr %arrayidx8.2.5, align 1
+ %arrayidx8.3.5 = getelementptr i8, ptr %c, i64 354
+ %12 = load i8, ptr %arrayidx8.3.5, align 1
+ %arrayidx8.4.5 = getelementptr i8, ptr %c, i64 357
+ %13 = load i8, ptr %arrayidx8.4.5, align 1
+ %arrayidx8.5.5 = getelementptr i8, ptr %c, i64 360
+ %14 = load i8, ptr %arrayidx8.5.5, align 1
+ %arrayidx8.6.5 = getelementptr i8, ptr %c, i64 363
+ %15 = load i8, ptr %arrayidx8.6.5, align 1
+ br label %for.cond
+
+for.cond:
+ %a.promoted2226 = phi i8 [ 0, %entry ], [ %or18.6.5, %for.cond ]
+ %or18.7.3 = or i8 %0, %1
+ %or18.435 = or i8 %or18.7.3, %2
+ %or18.1.4 = or i8 %or18.435, %3
+ %or18.2.4 = or i8 %or18.1.4, %4
+ %or18.3.4 = or i8 %or18.2.4, %5
+ %or18.4.4 = or i8 %or18.3.4, %6
+ %or18.5.4 = or i8 %or18.4.4, %7
+ %or18.6.4 = or i8 %or18.5.4, %8
+ %or18.537 = or i8 %or18.6.4, %9
+ %or18.1.5 = or i8 %or18.537, %10
+ %or18.2.5 = or i8 %or18.1.5, %11
+ %or18.3.5 = or i8 %or18.2.5, %12
+ %or18.4.5 = or i8 %or18.3.5, %13
+ %or18.5.5 = or i8 %or18.4.5, %14
+ %or18.6.5 = or i8 %or18.5.5, %15
+ br label %for.cond
+}
+
More information about the llvm-commits
mailing list