[llvm] 0dddfab - [SLP]Recalculate deps if the original instruction scheduled after being copyable

Wed Sep 10 10:18:56 PDT 2025

Author: Alexey Bataev
Date: 2025-09-10T10:18:45-07:00
New Revision: 0dddfab54cc3091ca6d29d9b733f0987ed79dc16

URL: https://github.com/llvm/llvm-project/commit/0dddfab54cc3091ca6d29d9b733f0987ed79dc16
DIFF: https://github.com/llvm/llvm-project/commit/0dddfab54cc3091ca6d29d9b733f0987ed79dc16.diff

LOG: [SLP]Recalculate deps if the original instruction scheduled after being copyable

If the original instruction is going to be scheduled after same
instruction being scheduled as copyable, need to recalculate
dependencies. Otherwise, the dependencies maybe calculated incorrectly.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/original-inst-scheduled-after-copyable.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1cfcd3ffbd664..6a56dbfaa0157 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20788,6 +20788,14 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
           continue;
         }
         auto *SD = cast<ScheduleData>(SE);
+        if (SD->hasValidDependencies() &&
+            (!S.areInstructionsWithCopyableElements() ||
+             !S.isCopyableElement(SD->getInst())) &&
+            !getScheduleCopyableData(SD->getInst()).empty() && EI.UserTE &&
+            EI.UserTE->hasState() &&
+            (!EI.UserTE->hasCopyableElements() ||
+             !EI.UserTE->isCopyableElement(SD->getInst())))
+          SD->clearDirectDependencies();
         for (const Use &U : SD->getInst()->operands()) {
           unsigned &NumOps =
               UserOpToNumOps

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/original-inst-scheduled-after-copyable.ll b/llvm/test/Transforms/SLPVectorizer/X86/original-inst-scheduled-after-copyable.ll
new file mode 100644
index 0000000000000..19eb7bf4dfc94
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/original-inst-scheduled-after-copyable.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
+
+define void @test(ptr %0, i32 %1, i32 %2) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 48
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 56
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[ADD_NARROWED_I_I:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP18:%.*]] = zext i32 [[ADD_NARROWED_I_I]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[TMP18]], -1
+; CHECK-NEXT:    [[TMP21:%.*]] = trunc i64 [[TMP19]] to i32
+; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP28]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = and <2 x i32> [[TMP11]], splat (i32 -2)
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> <i32 poison, i32 -2>, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP13]], [[TMP12]]
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP33:%.*]] = and <2 x i32> [[TMP17]], [[TMP32]]
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr null)
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP33]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = zext <2 x i32> [[TMP34]] to <2 x i64>
+; CHECK-NEXT:    [[TMP23:%.*]] = zext <2 x i32> [[TMP33]] to <2 x i64>
+; CHECK-NEXT:    [[TMP35:%.*]] = shl <2 x i64> [[TMP23]], splat (i64 1)
+; CHECK-NEXT:    [[TMP25:%.*]] = or <2 x i64> [[TMP35]], [[TMP22]]
+; CHECK-NEXT:    [[TMP26:%.*]] = trunc <2 x i64> [[TMP25]] to <2 x i32>
+; CHECK-NEXT:    [[TMP27:%.*]] = trunc <2 x i64> [[TMP25]] to <2 x i32>
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call i32 asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0)
+; CHECK-NEXT:    store <2 x i32> [[TMP16]], ptr [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <2 x i32> [[TMP32]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP30:%.*]] = and <2 x i32> [[TMP29]], [[TMP26]]
+; CHECK-NEXT:    [[TMP31:%.*]] = or <2 x i32> [[TMP30]], [[TMP27]]
+; CHECK-NEXT:    store <2 x i32> [[TMP31]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %3 = getelementptr i8, ptr %0, i64 48
+  %4 = getelementptr i8, ptr %0, i64 52
+  %5 = getelementptr i8, ptr %0, i64 56
+  %6 = getelementptr i8, ptr %0, i64 60
+  %.pre21.i = load i32, ptr %5, align 8
+  %.pre23.i = load i32, ptr %6, align 4
+  %7 = and i32 %2, %1
+  %8 = and i32 %.pre21.i, 1
+  %9 = and i32 %1, %.pre23.i
+  call void @llvm.stackrestore.p0(ptr null)
+  %add.narrowed.i.i = shl i32 %1, 1
+  %10 = lshr i32 %7, 1
+  %11 = zext i32 %10 to i64
+  %12 = zext i32 %8 to i64
+  %reass.add1.i = shl i64 %12, 1
+  %13 = or i64 %reass.add1.i, %11
+  %14 = trunc i64 %13 to i32
+  %15 = zext i32 %9 to i64
+  %reass.add2.i = shl i64 %15, 1
+  %16 = or i64 %reass.add2.i, %12
+  %17 = trunc i64 %16 to i32
+  %18 = zext i32 %add.narrowed.i.i to i64
+  %19 = add i64 %18, -1
+  %20 = trunc i64 %19 to i32
+  %21 = trunc i64 %19 to i32
+  %22 = trunc i64 %13 to i32
+  %23 = trunc i64 %16 to i32
+  %24 = tail call i32 asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0)
+  %25 = and i32 %20, -2
+  %26 = or i32 %1, %25
+  store i32 %26, ptr %3, align 16
+  %27 = and i32 %21, -2
+  %28 = xor i32 %27, -2
+  store i32 %28, ptr %4, align 4
+  %29 = and i32 %1, %14
+  %30 = or i32 %29, %22
+  store i32 %30, ptr %5, align 8
+  %31 = and i32 %1, %17
+  %32 = or i32 %31, %23
+  store i32 %32, ptr %6, align 4
+  ret void
+}
+
+declare void @llvm.stackrestore.p0(ptr) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn }