[llvm] cee7d99 - [SLP]Fix PR89438: Check for same vectorized node in MinBWs, not user.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 19 12:52:52 PDT 2024
Author: Alexey Bataev
Date: 2024-04-19T12:52:19-07:00
New Revision: cee7d994b94db625177cdfebcb8a6ce1ed677f85
URL: https://github.com/llvm/llvm-project/commit/cee7d994b94db625177cdfebcb8a6ce1ed677f85
DIFF: https://github.com/llvm/llvm-project/commit/cee7d994b94db625177cdfebcb8a6ce1ed677f85.diff
LOG: [SLP]Fix PR89438: Check for same vectorized node in MinBWs, not user.
Need to check if the buildvector node has perfect diamond match in the
graph and the matched node is resized.
Added:
llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e1788b83f114d..1b56bb7b600c26 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13139,9 +13139,30 @@ Value *BoUpSLP::vectorizeTree(
assert(Vec->getType()->isIntOrIntVectorTy() &&
PrevVec->getType()->isIntOrIntVectorTy() &&
"Expected integer vector types only.");
- assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
- "Expected user in MinBWs.");
- bool IsSigned = MinBWs.lookup(TE->UserTreeIndices.front().UserTE).second;
+ std::optional<std::pair<unsigned long, bool>> Res;
+ if (const TreeEntry *BaseTE = getTreeEntry(TE->Scalars.front())) {
+ SmallVector<const TreeEntry *> BaseTEs;
+ if (BaseTE->isSame(TE->Scalars))
+ BaseTEs.push_back(BaseTE);
+ auto It = MultiNodeScalars.find(TE->Scalars.front());
+ if (It != MultiNodeScalars.end()) {
+ for (const TreeEntry *MNTE : It->getSecond())
+ if (MNTE->isSame(TE->Scalars))
+ BaseTEs.push_back(MNTE);
+ }
+ const auto *BaseIt = find_if(BaseTEs, [&](const TreeEntry *BaseTE) {
+ return MinBWs.contains(BaseTE);
+ });
+ if (BaseIt != BaseTEs.end())
+ Res = MinBWs.lookup(*BaseIt);
+ }
+ if (!Res) {
+ assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
+ "Expected user in MinBWs.");
+ Res = MinBWs.lookup(TE->UserTreeIndices.front().UserTE);
+ }
+ assert(Res && "Expected user node or perfect diamond match in MinBWs.");
+ bool IsSigned = Res->second;
Vec = Builder.CreateIntCast(Vec, PrevVec->getType(), IsSigned);
}
PrevVec->replaceAllUsesWith(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll
new file mode 100644
index 00000000000000..b03eb9e67254be
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s
+
+define i64 @test(ptr %p) {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 12
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> zeroinitializer, zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8>
+; CHECK-NEXT: store <4 x i8> [[TMP12]], ptr [[TMP1]], align 1
+; CHECK-NEXT: ret i64 0
+;
+ %1 = getelementptr i8, ptr %p, i64 13
+ %2 = getelementptr i8, ptr %p, i64 14
+ %3 = getelementptr i8, ptr %p, i64 15
+ %4 = getelementptr i8, ptr %p, i64 12
+ %5 = zext i8 0 to i32
+ %6 = and i32 %5, 0
+ %.not866 = icmp eq i32 %6, 0
+ %7 = select i1 %.not866, i32 0, i32 0
+ %8 = xor i32 0, %7
+ %9 = zext i8 0 to i32
+ %10 = and i32 %9, 0
+ %.not871 = icmp eq i32 %10, 0
+ %11 = select i1 %.not871, i32 0, i32 0
+ %12 = xor i32 0, %11
+ %13 = xor i32 %9, 0
+ %14 = xor i32 %13, 0
+ %15 = xor i32 %14, 0
+ %16 = xor i32 %15, 0
+ %17 = xor i32 %16, 0
+ %18 = xor i32 %17, %12
+ %19 = xor i32 %18, 0
+ %20 = xor i32 %19, 0
+ %21 = xor i32 %20, 0
+ %22 = xor i32 %21, 0
+ %23 = trunc i32 %22 to i8
+ store i8 %23, ptr %4, align 1
+ %24 = xor i32 %9, 0
+ %25 = xor i32 %24, 0
+ %26 = xor i32 %25, 0
+ %27 = xor i32 %26, 0
+ %28 = xor i32 %27, 0
+ %29 = xor i32 %28, %8
+ %30 = xor i32 %29, 0
+ %31 = xor i32 %30, 0
+ %32 = xor i32 %31, 0
+ %33 = xor i32 %32, 0
+ %34 = trunc i32 %33 to i8
+ store i8 %34, ptr %1, align 1
+ %35 = xor i32 0, %5
+ %36 = xor i32 %35, 0
+ %37 = xor i32 %36, 0
+ %38 = xor i32 %37, 0
+ %39 = xor i32 %38, 0
+ %40 = xor i32 %39, %8
+ %41 = xor i32 %40, 0
+ %42 = xor i32 %41, 0
+ %43 = xor i32 %42, 0
+ %44 = xor i32 %43, 0
+ %45 = trunc i32 %44 to i8
+ store i8 %45, ptr %2, align 1
+ %46 = xor i32 %35, 0
+ %47 = xor i32 %46, 0
+ %48 = xor i32 %47, 0
+ %49 = xor i32 %48, 0
+ %50 = xor i32 %49, %8
+ %51 = xor i32 %50, 0
+ %52 = xor i32 %51, 0
+ %53 = xor i32 %52, 0
+ %54 = xor i32 %53, 0
+ %55 = trunc i32 %54 to i8
+ store i8 %55, ptr %3, align 1
+ ret i64 0
+}
More information about the llvm-commits
mailing list