[llvm] db6ba82 - [SLP] Do not match the gather node with copyable parent, containing insert instruction

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 29 11:50:58 PDT 2025


Author: Alexey Bataev
Date: 2025-10-29T11:50:47-07:00
New Revision: db6ba82acc767651ee59d249d717706be7239953

URL: https://github.com/llvm/llvm-project/commit/db6ba82acc767651ee59d249d717706be7239953
DIFF: https://github.com/llvm/llvm-project/commit/db6ba82acc767651ee59d249d717706be7239953.diff

LOG: [SLP] Do not match the gather node with copyable parent, containing insert instruction

If the gather/buildvector node has the match and this matching node has
a scheduled copyable parent, and the parent node of the original node
has a last instruction, which is non-schedulable and is part of the
schedule copyable parent, such matching node should be excluded as
non-matching, since it produces wrong def-use chain.

Fixes #165435

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 43166c035fe7a..1b55a3b235228 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16920,7 +16920,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
       // otherwise TEPtr depends on TE.
       if ((TEInsertBlock != InsertPt->getParent() ||
            TEUseEI.EdgeIdx < UseEI.EdgeIdx || TEUseEI.UserTE != UseEI.UserTE) &&
-          !CheckOrdering(InsertPt))
+          (!CheckOrdering(InsertPt) ||
+           (UseEI.UserTE->hasCopyableElements() &&
+            isUsedOutsideBlock(const_cast<Instruction *>(TEInsertPt)) &&
+            is_contained(UseEI.UserTE->Scalars, TEInsertPt))))
         continue;
       // The node is reused - exit.
       if (CheckAndUseSameNode(TEPtr))

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll
new file mode 100644
index 0000000000000..260de1cc2b76a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define double @test() {
+; CHECK-LABEL: define double @test() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP3:%.*]], %[[BB4:.*]] ]
+; CHECK-NEXT:    br label %[[BB4]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 0, 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[MUL]], i32 0
+; CHECK-NEXT:    [[TMP3]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 1>, i32 [[MUL]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 false, label %[[BB7:.*]], label %[[BB1]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x i32> [ [[TMP5]], %[[BB4]] ]
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+bb:
+  br label %bb1
+
+bb1:
+  %phi = phi i32 [ 0, %bb ], [ 0, %bb4 ]
+  %phi2 = phi i32 [ 0, %bb ], [ 0, %bb4 ]
+  %phi3 = phi i32 [ 0, %bb ], [ %or5, %bb4 ]
+  br label %bb4
+
+bb4:
+  %or = or i32 %phi2, 0
+  %mul = mul i32 0, 1
+  %or5 = or i32 %phi3, %mul
+  %and = and i32 %or, 0
+  %or6 = or i32 %phi2, 1
+  br i1 false, label %bb7, label %bb1
+
+bb7:
+  %phi8 = phi i32 [ %phi, %bb4 ]
+  %phi9 = phi i32 [ %or, %bb4 ]
+  %phi10 = phi i32 [ %or5, %bb4 ]
+  %phi11 = phi i32 [ %or6, %bb4 ]
+  ret double 0.000000e+00
+}
+


        


More information about the llvm-commits mailing list