[llvm] 418a987 - [SLP]Do not use node, if it is a subvector or buildvector node
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 13:30:17 PST 2025
Author: Alexey Bataev
Date: 2025-02-26T13:25:37-08:00
New Revision: 418a9872851ef5342b29baa36dd672129f129953
URL: https://github.com/llvm/llvm-project/commit/418a9872851ef5342b29baa36dd672129f129953
DIFF: https://github.com/llvm/llvm-project/commit/418a9872851ef5342b29baa36dd672129f129953.diff
LOG: [SLP]Do not use node, if it is a subvector or buildvector node
If the buildvector has some matches with another node, which is
a subvector of another buildvector node, need to check for this and
cancel matching to avoid incorrect ordering of the nodes.
Fixes #128770
Added:
llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 02fea2eaf9d41..b25b09306aca8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13353,6 +13353,14 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
return EdgeInfo(const_cast<TreeEntry *>(TE), 0);
return TE->UserTreeIndex;
};
+ auto HasGatherUser = [&](const TreeEntry *TE) {
+ while (TE->Idx != 0 && TE->UserTreeIndex) {
+ if (TE->UserTreeIndex.EdgeIdx == UINT_MAX)
+ return true;
+ TE = TE->UserTreeIndex.UserTE;
+ }
+ return false;
+ };
const EdgeInfo TEUseEI = GetUserEntry(TE);
if (!TEUseEI)
return std::nullopt;
@@ -13453,7 +13461,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
// If the user instruction is used for some reason in
diff erent
// vectorized nodes - make it depend on index.
if (TEUseEI.UserTE != UseEI.UserTE &&
- TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
+ (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
+ HasGatherUser(TEUseEI.UserTE)))
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
new file mode 100644
index 0000000000000..5d2f059a8cf41
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
+
+define void @test(ptr %0, i64 %1, i64 %2) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3, i32 1>
+; CHECK-NEXT: br [[DOTPREHEADER_LR_PH:label %.*]]
+; CHECK: [[_PREHEADER_LR_PH:.*:]]
+; CHECK-NEXT: br [[DOTPREHEADER_US_US_PREHEADER:label %.*]]
+; CHECK: [[_PREHEADER_US_US_PREHEADER:.*:]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i64> poison, i64 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i64> [[TMP9]], i64 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP11]], <4 x i64> [[TMP6]], i64 4)
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP8]]
+; CHECK-NEXT: br [[DOTPREHEADER_US_US:label %.*]]
+; CHECK: [[_PREHEADER_US_US:.*:]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ult <8 x i64> [[TMP13]], splat (i64 32)
+; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP14]])
+; CHECK-NEXT: br i1 [[TMP15]], label %[[SCALAR_PH:.*]], [[DOTPREHEADER_US_US]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: ret void
+;
+ %4 = ptrtoint ptr %0 to i64
+ %5 = ptrtoint ptr %0 to i64
+ %6 = ptrtoint ptr %0 to i64
+ %7 = ptrtoint ptr %0 to i64
+ %8 = ptrtoint ptr %0 to i64
+ br label %.preheader.lr.ph
+
+.preheader.lr.ph:
+ br label %.preheader.us.us.preheader
+
+.preheader.us.us.preheader:
+ %9 = or i64 %1, %7
+ %10 = or i64 %2, %7
+ %11 = or i64 %1, %4
+ %12 = or i64 %1, %8
+ %13 = or i64 %5, %8
+ %14 = or i64 %8, %4
+ %15 = or i64 %5, %6
+ %16 = or i64 %6, %4
+ br label %.preheader.us.us
+
+.preheader.us.us:
+ %
diff .check1 = icmp ult i64 %9, 32
+ %
diff .check2 = icmp ult i64 %10, 32
+ %conflict.rdx3 = or i1 %
diff .check1, %
diff .check2
+ %
diff .check4 = icmp ult i64 %11, 32
+ %conflict.rdx5 = or i1 %conflict.rdx3, %
diff .check4
+ %
diff .check6 = icmp ult i64 %12, 32
+ %conflict.rdx7 = or i1 %conflict.rdx5, %
diff .check6
+ %
diff .check8 = icmp ult i64 %13, 32
+ %conflict.rdx9 = or i1 %conflict.rdx7, %
diff .check8
+ %
diff .check10 = icmp ult i64 %14, 32
+ %conflict.rdx11 = or i1 %conflict.rdx9, %
diff .check10
+ %
diff .check12 = icmp ult i64 %15, 32
+ %conflict.rdx13 = or i1 %conflict.rdx11, %
diff .check12
+ %
diff .check14 = icmp ult i64 %16, 32
+ %conflict.rdx15 = or i1 %conflict.rdx13, %
diff .check14
+ br i1 %conflict.rdx15, label %scalar.ph, label %.preheader.us.us
+
+scalar.ph:
+ ret void
+}
More information about the llvm-commits
mailing list