[llvm] 96806a7 - [SLP]Gather copyable node, if its parent is copyable, but this node is still used outside of the block only
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 11:17:17 PST 2025
Author: Alexey Bataev
Date: 2025-11-06T11:16:55-08:00
New Revision: 96806a7ec35f0e46132801c201ef53969f09ca81
URL: https://github.com/llvm/llvm-project/commit/96806a7ec35f0e46132801c201ef53969f09ca81
DIFF: https://github.com/llvm/llvm-project/commit/96806a7ec35f0e46132801c201ef53969f09ca81.diff
LOG: [SLP]Gather copyable node, if its parent is copyable, but this node is still used outside of the block only
If the current node is a copyable node and its parent is copyable too
and still current node is only used outside, better to cancel scheduling
for such node, because otherwise there might be wrong def-use chain
built during vectorization.
Fixes #166775
Added:
llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf3f52c51b64c..df835a077f2a0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20996,6 +20996,15 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
return false;
}))
return std::nullopt;
+ if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
+ EI.UserTE->hasCopyableElements() &&
+ EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() &&
+ all_of(VL, [&](Value *V) {
+ if (S.isCopyableElement(V))
+ return true;
+ return isUsedOutsideBlock(V);
+ }))
+ return std::nullopt;
bool HasCopyables = S.areInstructionsWithCopyableElements();
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
new file mode 100644
index 0000000000000..65975199e46b8
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define <4 x i32> @test() {
+; CHECK-LABEL: define <4 x i32> @test() {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[TRUNC]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[TRUNC]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 0, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[OR]] to i64
+; CHECK-NEXT: br label %[[BB3:.*]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+bb:
+ %trunc = trunc i64 0 to i32
+ br label %bb1
+
+bb1:
+ %or = or i32 %trunc, 0
+ %zext = zext i32 %or to i64
+ %and = and i32 0, 0
+ %or2 = or i32 %trunc, 0
+ br label %bb3
+
+bb3:
+ %0 = insertelement <4 x i32> zeroinitializer, i32 %trunc, i32 0
+ %1 = insertelement <4 x i32> %0, i32 %and, i32 1
+ %2 = insertelement <4 x i32> %1, i32 %or2, i32 2
+ %3 = insertelement <4 x i32> %2, i32 %or, i32 3
+ ret <4 x i32> %3
+}
More information about the llvm-commits
mailing list