[llvm] dd5ba69 - [SLP]Recalculate deps for potential control-dependent schedule data

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 13 08:18:36 PDT 2025


Author: Alexey Bataev
Date: 2025-08-13T08:18:26-07:00
New Revision: dd5ba694bde6c92259c518b9eb2888163a812433

URL: https://github.com/llvm/llvm-project/commit/dd5ba694bde6c92259c518b9eb2888163a812433
DIFF: https://github.com/llvm/llvm-project/commit/dd5ba694bde6c92259c518b9eb2888163a812433.diff

LOG: [SLP]Recalculate deps for potential control-dependent schedule data

After clearing the dependencies in copyable data, need to recalculate
dependencies for the original ScheduleData, if it can be marked as
control dependent.

Fixes #153289

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 98011174bd703..a3cb4d138789c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5689,7 +5689,8 @@ class BoUpSLP {
     /// Updates the dependency information of a bundle and of all instructions/
     /// bundles which depend on the original bundle.
     void calculateDependencies(ScheduleBundle &Bundle, bool InsertInReadyList,
-                               BoUpSLP *SLP);
+                               BoUpSLP *SLP,
+                               ArrayRef<ScheduleData *> ControlDeps = {});
 
     /// Sets all instruction in the scheduling region to un-scheduled.
     void resetSchedule();
@@ -20727,15 +20728,21 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.getMainOp() << "\n");
 
   auto TryScheduleBundleImpl = [=](bool ReSchedule, ScheduleBundle &Bundle) {
-    // Clear deps or reculate the region, if the memory instruction is a
-    // copyable. It may have memory deps, which must be reaculated.
+    // Clear deps or recalculate the region, if the memory instruction is a
+    // copyable. It may have memory deps, which must be recalculated.
+    SmallVector<ScheduleData *> ControlDependentMembers;
     auto CheckIfNeedToClearDeps = [&](ScheduleBundle &Bundle) {
       SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
       for (ScheduleEntity *SE : Bundle.getBundle()) {
         if (ScheduleCopyableData *SD = dyn_cast<ScheduleCopyableData>(SE)) {
           if (ScheduleData *BundleMember = getScheduleData(SD->getInst());
-              BundleMember && BundleMember->hasValidDependencies())
+              BundleMember && BundleMember->hasValidDependencies()) {
             BundleMember->clearDirectDependencies();
+            if (RegionHasStackSave ||
+                !isGuaranteedToTransferExecutionToSuccessor(
+                    BundleMember->getInst()))
+              ControlDependentMembers.push_back(BundleMember);
+          }
           continue;
         }
         auto *SD = cast<ScheduleData>(SE);
@@ -20748,8 +20755,12 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
           if (auto *Op = dyn_cast<Instruction>(U.get());
               Op && areAllOperandsReplacedByCopyableData(SD->getInst(), Op,
                                                          *SLP, NumOps)) {
-            if (ScheduleData *OpSD = getScheduleData(Op))
+            if (ScheduleData *OpSD = getScheduleData(Op)) {
               OpSD->clearDirectDependencies();
+              if (RegionHasStackSave ||
+                  !isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
+                ControlDependentMembers.push_back(OpSD);
+            }
           }
         }
       }
@@ -20783,7 +20794,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
         CheckIfNeedToClearDeps(Bundle);
       LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << Bundle << " in block "
                         << BB->getName() << "\n");
-      calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP);
+      calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP,
+                            ControlDependentMembers);
     }
 
     if (ReSchedule) {
@@ -21048,9 +21060,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
   }
 }
 
-void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
-                                                     bool InsertInReadyList,
-                                                     BoUpSLP *SLP) {
+void BoUpSLP::BlockScheduling::calculateDependencies(
+    ScheduleBundle &Bundle, bool InsertInReadyList, BoUpSLP *SLP,
+    ArrayRef<ScheduleData *> ControlDeps) {
   SmallVector<ScheduleEntity *> WorkList;
   auto ProcessNode = [&](ScheduleEntity *SE) {
     if (auto *CD = dyn_cast<ScheduleCopyableData>(SE)) {
@@ -21293,6 +21305,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
   };
 
   WorkList.push_back(Bundle.getBundle().front());
+  WorkList.append(ControlDeps.begin(), ControlDeps.end());
   SmallPtrSet<ScheduleBundle *, 16> Visited;
   while (!WorkList.empty()) {
     ScheduleEntity *SD = WorkList.pop_back_val();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll b/llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll
new file mode 100644
index 0000000000000..8602c256ad8c2
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[K:%.*]] = alloca [4 x i32], align 16
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[SUB2:%.*]] = add i32 [[ADD1]], -1
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr null, i32 [[ADD1]])
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[TMP1]], -1
+; CHECK-NEXT:    [[SUB3:%.*]] = add i32 [[ADD2]], [[CALL]]
+; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[SUB3]], [[TMP0]]
+; CHECK-NEXT:    store i32 [[ADD4]], ptr [[K]], align 16
+; CHECK-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr i8, ptr [[K]], i64 4
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYINIT_ELEMENT]], align 4
+; CHECK-NEXT:    [[ARRAYINIT_ELEMENT5:%.*]] = getelementptr i8, ptr [[K]], i64 8
+; CHECK-NEXT:    [[ADD7:%.*]] = add i32 [[ADD2]], [[SUB2]]
+; CHECK-NEXT:    [[SUB8:%.*]] = add i32 [[ADD7]], [[TMP0]]
+; CHECK-NEXT:    store i32 [[SUB8]], ptr [[ARRAYINIT_ELEMENT5]], align 8
+; CHECK-NEXT:    [[ARRAYINIT_ELEMENT9:%.*]] = getelementptr i8, ptr [[K]], i64 12
+; CHECK-NEXT:    [[ADD13:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[ADD13]], [[TMP0]]
+; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[ADD10]], [[ADD1]]
+; CHECK-NEXT:    [[ADD12:%.*]] = add i32 [[ADD11]], [[TMP0]]
+; CHECK-NEXT:    store i32 [[ADD12]], ptr [[ARRAYINIT_ELEMENT9]], align 4
+; CHECK-NEXT:    [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr null, ptr [[K]])
+; CHECK-NEXT:    ret i32 [[CALL15]]
+;
+entry:
+  %k = alloca [4 x i32], align 16
+  %add1 = add i32 %0, %1
+  %sub2 = add i32 %add1, -1
+  %call = tail call i32 (ptr, ...) @printf(ptr null, i32 %add1)
+  %add2 = add i32 %1, -1
+  %sub3 = add i32 %add2, %call
+  %add4 = add i32 %sub3, %0
+  store i32 %add4, ptr %k, align 16
+  %arrayinit.element = getelementptr i8, ptr %k, i64 4
+  store i32 0, ptr %arrayinit.element, align 4
+  %arrayinit.element5 = getelementptr i8, ptr %k, i64 8
+  %add7 = add i32 %add2, %sub2
+  %sub8 = add i32 %add7, %0
+  store i32 %sub8, ptr %arrayinit.element5, align 8
+  %arrayinit.element9 = getelementptr i8, ptr %k, i64 12
+  %add13 = add i32 %1, 1
+  %add10 = add i32 %add13, %0
+  %add11 = add i32 %add10, %add1
+  %add12 = add i32 %add11, %0
+  store i32 %add12, ptr %arrayinit.element9, align 4
+  %call15 = call i32 (ptr, ...) @printf(ptr null, ptr %k)
+  ret i32 %call15
+}
+
+declare i32 @printf(ptr, ...)


        


More information about the llvm-commits mailing list