[llvm] db6ba82 - [SLP] Do not match the gather node with copyable parent, containing insert instruction
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 29 11:50:58 PDT 2025
Author: Alexey Bataev
Date: 2025-10-29T11:50:47-07:00
New Revision: db6ba82acc767651ee59d249d717706be7239953
URL: https://github.com/llvm/llvm-project/commit/db6ba82acc767651ee59d249d717706be7239953
DIFF: https://github.com/llvm/llvm-project/commit/db6ba82acc767651ee59d249d717706be7239953.diff
LOG: [SLP] Do not match the gather node with copyable parent, containing insert instruction
If the gather/buildvector node has the match and this matching node has
a scheduled copyable parent, and the parent node of the original node
has a last instruction, which is non-schedulable and is part of the
schedule copyable parent, such matching node should be excluded as
non-matching, since it produces wrong def-use chain.
Fixes #165435
Added:
llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 43166c035fe7a..1b55a3b235228 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16920,7 +16920,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
// otherwise TEPtr depends on TE.
if ((TEInsertBlock != InsertPt->getParent() ||
TEUseEI.EdgeIdx < UseEI.EdgeIdx || TEUseEI.UserTE != UseEI.UserTE) &&
- !CheckOrdering(InsertPt))
+ (!CheckOrdering(InsertPt) ||
+ (UseEI.UserTE->hasCopyableElements() &&
+ isUsedOutsideBlock(const_cast<Instruction *>(TEInsertPt)) &&
+ is_contained(UseEI.UserTE->Scalars, TEInsertPt))))
continue;
// The node is reused - exit.
if (CheckAndUseSameNode(TEPtr))
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll
new file mode 100644
index 0000000000000..260de1cc2b76a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define double @test() {
+; CHECK-LABEL: define double @test() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP3:%.*]], %[[BB4:.*]] ]
+; CHECK-NEXT: br label %[[BB4]]
+; CHECK: [[BB4]]:
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[MUL]], i32 0
+; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 1>, i32 [[MUL]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP6]], 0
+; CHECK-NEXT: br i1 false, label %[[BB7:.*]], label %[[BB1]]
+; CHECK: [[BB7]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ [[TMP5]], %[[BB4]] ]
+; CHECK-NEXT: ret double 0.000000e+00
+;
+bb:
+ br label %bb1
+
+bb1:
+ %phi = phi i32 [ 0, %bb ], [ 0, %bb4 ]
+ %phi2 = phi i32 [ 0, %bb ], [ 0, %bb4 ]
+ %phi3 = phi i32 [ 0, %bb ], [ %or5, %bb4 ]
+ br label %bb4
+
+bb4:
+ %or = or i32 %phi2, 0
+ %mul = mul i32 0, 1
+ %or5 = or i32 %phi3, %mul
+ %and = and i32 %or, 0
+ %or6 = or i32 %phi2, 1
+ br i1 false, label %bb7, label %bb1
+
+bb7:
+ %phi8 = phi i32 [ %phi, %bb4 ]
+ %phi9 = phi i32 [ %or, %bb4 ]
+ %phi10 = phi i32 [ %or5, %bb4 ]
+ %phi11 = phi i32 [ %or6, %bb4 ]
+ ret double 0.000000e+00
+}
+
More information about the llvm-commits
mailing list