[llvm] eb14d2a - [SLP]Fix check for matched gather node, if it is a subvector node
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 06:48:53 PST 2025
Author: Alexey Bataev
Date: 2025-02-24T06:48:43-08:00
New Revision: eb14d2a1d48fe76c5faed71a08547135a35ee742
URL: https://github.com/llvm/llvm-project/commit/eb14d2a1d48fe76c5faed71a08547135a35ee742
DIFF: https://github.com/llvm/llvm-project/commit/eb14d2a1d48fe76c5faed71a08547135a35ee742.diff
LOG: [SLP]Fix check for matched gather node, if it is a subvector node
If the gather node is a subvector node, it may match the existing
vector/gather node in the graph, but still may require reordering. in
this case need to fully check its dependencies to prevent a compiler
crash.
Fixes #128401
Added:
llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf256d82ae17d..5fc5fb10fad55 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13320,9 +13320,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
Entries.clear();
// TODO: currently checking only for Scalars in the tree entry, need to count
// reused elements too for better cost estimation.
- const EdgeInfo &TEUseEI = TE == VectorizableTree.front().get()
- ? EdgeInfo(const_cast<TreeEntry *>(TE), 0)
- : TE->UserTreeIndex;
+ auto GetUserEntry = [&](const TreeEntry *TE) {
+ while (TE->UserTreeIndex && TE->UserTreeIndex.EdgeIdx == UINT_MAX)
+ TE = TE->UserTreeIndex.UserTE;
+ if (TE == VectorizableTree.front().get())
+ return EdgeInfo(const_cast<TreeEntry *>(TE), 0);
+ return TE->UserTreeIndex;
+ };
+ const EdgeInfo TEUseEI = GetUserEntry(TE);
+ if (!TEUseEI)
+ return std::nullopt;
const Instruction *TEInsertPt = &getLastInstructionInBundle(TEUseEI.UserTE);
const BasicBlock *TEInsertBlock = nullptr;
// Main node of PHI entries keeps the correct order of operands/incoming
@@ -13874,15 +13881,13 @@ BoUpSLP::isGatherShuffledEntry(
assert(VL.size() % NumParts == 0 &&
"Number of scalars must be divisible by NumParts.");
if (TE->UserTreeIndex && TE->UserTreeIndex.UserTE->isGather() &&
- TE->UserTreeIndex.EdgeIdx == UINT_MAX) {
- assert(
- (TE->Idx == 0 ||
- (TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
- isSplat(TE->Scalars) ||
- getSameValuesTreeEntry(TE->getMainOp(), TE->Scalars)) &&
- "Expected splat or extractelements only node.");
+ TE->UserTreeIndex.EdgeIdx == UINT_MAX &&
+ (TE->Idx == 0 ||
+ (TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
+ isSplat(TE->Scalars) ||
+ (TE->hasState() &&
+ getSameValuesTreeEntry(TE->getMainOp(), TE->Scalars))))
return {};
- }
unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
SmallVector<std::optional<TTI::ShuffleKind>> Res;
for (unsigned Part : seq<unsigned>(NumParts)) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll
new file mode 100644
index 0000000000000..c12918f3bb81d
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-matched-node-reorder.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+ at g = global [8 x i8] zeroinitializer
+
+define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[L2_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 5), align 1
+; CHECK-NEXT: [[LI_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 1
+; CHECK-NEXT: [[L1_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 7), align 1
+; CHECK-NEXT: [[L6_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), align 1
+; CHECK-NEXT: br i1 false, label %[[IF_END151_1_I_I_1_I_I_I:.*]], label %[[END:.*]]
+; CHECK: [[PRE:.*]]:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[LI_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L2_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L6_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L1_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L1_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L6_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L2_I_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[LI_I_I_I]], %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[IF_END151_1_I_I_1_I_I_I]]
+; CHECK: [[IF_END151_1_I_I_1_I_I_I]]:
+; CHECK-NEXT: [[TMP8:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP7]], %[[END]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP3]], %[[END]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP2]], %[[END]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP1]], %[[END]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP0]], %[[END]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP7]], %[[END]] ]
+; CHECK-NEXT: [[TOBOOL_NOT_I_1121_I_1_I_I_I:%.*]] = icmp ne i8 [[TMP18]], 0
+; CHECK-NEXT: [[C1_1_1117_I_1_I9_I_I:%.*]] = icmp eq i8 [[TMP17]], 0
+; CHECK-NEXT: [[C1_I_1_I_I_I:%.*]] = icmp eq i8 [[TMP16]], 0
+; CHECK-NEXT: [[C1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+; CHECK-NEXT: [[CMP258_I_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP13]], 0
+; CHECK-NEXT: [[C1_I_I_I_I:%.*]] = icmp eq i8 [[TMP14]], 0
+; CHECK-NEXT: [[CMP258_I_I_I_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+; CHECK-NEXT: [[C1_1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 0
+; CHECK-NEXT: [[CMP258_I_1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
+; CHECK-NEXT: [[C1_187_I_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+; CHECK-NEXT: [[CMP258_I_185_I_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
+; CHECK-NEXT: [[C1_1_1117_I_I_I_I:%.*]] = icmp eq i8 [[TMP19]], 0
+; CHECK-NEXT: [[CMP258_I_1_1115_I_I_I_I:%.*]] = icmp eq i8 [[TMP20]], 0
+; CHECK-NEXT: [[C1_1113_I_I_I_I:%.*]] = icmp eq i8 [[TMP21]], 0
+; CHECK-NEXT: [[CMP258_I_1111_I_I_I_I:%.*]] = icmp eq i8 [[TMP22]], 0
+; CHECK-NEXT: [[C1_187_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP16]], 0
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %l2.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 5), align 1
+ %li.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 1
+ %l1.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 7), align 1
+ %l6.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), align 1
+ br i1 false, label %if.end151.1.i.i.1.i.i.i, label %end
+
+pre:
+ br label %end
+
+end:
+ %0 = phi i8 [ 0, %pre ], [ %li.i.i.i, %entry ]
+ %1 = phi i8 [ 0, %pre ], [ %l2.i.i.i.i, %entry ]
+ %2 = phi i8 [ 0, %pre ], [ %l6.i.i.i.i, %entry ]
+ %3 = phi i8 [ 0, %pre ], [ %l1.i.i.i.i, %entry ]
+ %4 = phi i8 [ 0, %pre ], [ %l1.i.i.i.i, %entry ]
+ %5 = phi i8 [ 0, %pre ], [ %l6.i.i.i.i, %entry ]
+ %6 = phi i8 [ 0, %pre ], [ %l2.i.i.i.i, %entry ]
+ %7 = phi i8 [ 0, %pre ], [ %li.i.i.i, %entry ]
+ br label %if.end151.1.i.i.1.i.i.i
+
+if.end151.1.i.i.1.i.i.i:
+ %8 = phi i8 [ %li.i.i.i, %entry ], [ %7, %end ]
+ %9 = phi i8 [ %l2.i.i.i.i, %entry ], [ %6, %end ]
+ %10 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
+ %11 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
+ %12 = phi i8 [ %l1.i.i.i.i, %entry ], [ %3, %end ]
+ %13 = phi i8 [ 0, %entry ], [ %2, %end ]
+ %14 = phi i8 [ %l2.i.i.i.i, %entry ], [ %1, %end ]
+ %15 = phi i8 [ %li.i.i.i, %entry ], [ %0, %end ]
+ %16 = phi i8 [ 0, %entry ], [ %6, %end ]
+ %17 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
+ %18 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
+ %19 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
+ %20 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
+ %21 = phi i8 [ %l2.i.i.i.i, %entry ], [ %6, %end ]
+ %22 = phi i8 [ %li.i.i.i, %entry ], [ %7, %end ]
+ %tobool.not.i.1121.i.1.i.i.i = icmp ne i8 %18, 0
+ %c1.1.1117.i.1.i9.i.i = icmp eq i8 %17, 0
+ %c1.i.1.i.i.i = icmp eq i8 %16, 0
+ %c1.1.i.i.i.i = icmp eq i8 %12, 0
+ %cmp258.i.1.i.i.i.i = icmp eq i8 %13, 0
+ %c1.i.i.i.i = icmp eq i8 %14, 0
+ %cmp258.i.i.i.i.i = icmp eq i8 %15, 0
+ %c1.1.1.i.i.i.i = icmp eq i8 %11, 0
+ %cmp258.i.1.1.i.i.i.i = icmp eq i8 %10, 0
+ %c1.187.i.i.i.i = icmp eq i8 %9, 0
+ %cmp258.i.185.i.i.i.i = icmp eq i8 %8, 0
+ %c1.1.1117.i.i.i.i = icmp eq i8 %19, 0
+ %cmp258.i.1.1115.i.i.i.i = icmp eq i8 %20, 0
+ %c1.1113.i.i.i.i = icmp eq i8 %21, 0
+ %cmp258.i.1111.i.i.i.i = icmp eq i8 %22, 0
+ %c1.187.1.i.i.i.i = icmp eq i8 %16, 0
+ ret i32 0
+}
More information about the llvm-commits
mailing list