[llvm] dd5ba69 - [SLP]Recalculate deps for potential control-dependent schedule data
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 13 08:18:36 PDT 2025
Author: Alexey Bataev
Date: 2025-08-13T08:18:26-07:00
New Revision: dd5ba694bde6c92259c518b9eb2888163a812433
URL: https://github.com/llvm/llvm-project/commit/dd5ba694bde6c92259c518b9eb2888163a812433
DIFF: https://github.com/llvm/llvm-project/commit/dd5ba694bde6c92259c518b9eb2888163a812433.diff
LOG: [SLP]Recalculate deps for potential control-dependent schedule data
After clearing the dependencies in copyable data, need to recalculate
dependencies for the original ScheduleData, if it can be marked as
control dependent.
Fixes #153289
Added:
llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 98011174bd703..a3cb4d138789c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5689,7 +5689,8 @@ class BoUpSLP {
/// Updates the dependency information of a bundle and of all instructions/
/// bundles which depend on the original bundle.
void calculateDependencies(ScheduleBundle &Bundle, bool InsertInReadyList,
- BoUpSLP *SLP);
+ BoUpSLP *SLP,
+ ArrayRef<ScheduleData *> ControlDeps = {});
/// Sets all instruction in the scheduling region to un-scheduled.
void resetSchedule();
@@ -20727,15 +20728,21 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n");
auto TryScheduleBundleImpl = [=](bool ReSchedule, ScheduleBundle &Bundle) {
- // Clear deps or reculate the region, if the memory instruction is a
- // copyable. It may have memory deps, which must be reaculated.
+ // Clear deps or recalculate the region, if the memory instruction is a
+ // copyable. It may have memory deps, which must be recalculated.
+ SmallVector<ScheduleData *> ControlDependentMembers;
auto CheckIfNeedToClearDeps = [&](ScheduleBundle &Bundle) {
SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
for (ScheduleEntity *SE : Bundle.getBundle()) {
if (ScheduleCopyableData *SD = dyn_cast<ScheduleCopyableData>(SE)) {
if (ScheduleData *BundleMember = getScheduleData(SD->getInst());
- BundleMember && BundleMember->hasValidDependencies())
+ BundleMember && BundleMember->hasValidDependencies()) {
BundleMember->clearDirectDependencies();
+ if (RegionHasStackSave ||
+ !isGuaranteedToTransferExecutionToSuccessor(
+ BundleMember->getInst()))
+ ControlDependentMembers.push_back(BundleMember);
+ }
continue;
}
auto *SD = cast<ScheduleData>(SE);
@@ -20748,8 +20755,12 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (auto *Op = dyn_cast<Instruction>(U.get());
Op && areAllOperandsReplacedByCopyableData(SD->getInst(), Op,
*SLP, NumOps)) {
- if (ScheduleData *OpSD = getScheduleData(Op))
+ if (ScheduleData *OpSD = getScheduleData(Op)) {
OpSD->clearDirectDependencies();
+ if (RegionHasStackSave ||
+ !isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
+ ControlDependentMembers.push_back(OpSD);
+ }
}
}
}
@@ -20783,7 +20794,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
CheckIfNeedToClearDeps(Bundle);
LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << Bundle << " in block "
<< BB->getName() << "\n");
- calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP);
+ calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP,
+ ControlDependentMembers);
}
if (ReSchedule) {
@@ -21048,9 +21060,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
}
}
-void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
- bool InsertInReadyList,
- BoUpSLP *SLP) {
+void BoUpSLP::BlockScheduling::calculateDependencies(
+ ScheduleBundle &Bundle, bool InsertInReadyList, BoUpSLP *SLP,
+ ArrayRef<ScheduleData *> ControlDeps) {
SmallVector<ScheduleEntity *> WorkList;
auto ProcessNode = [&](ScheduleEntity *SE) {
if (auto *CD = dyn_cast<ScheduleCopyableData>(SE)) {
@@ -21293,6 +21305,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
};
WorkList.push_back(Bundle.getBundle().front());
+ WorkList.append(ControlDeps.begin(), ControlDeps.end());
SmallPtrSet<ScheduleBundle *, 16> Visited;
while (!WorkList.empty()) {
ScheduleEntity *SD = WorkList.pop_back_val();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll b/llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll
new file mode 100644
index 0000000000000..8602c256ad8c2
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/control-dependent-schedule.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[K:%.*]] = alloca [4 x i32], align 16
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[SUB2:%.*]] = add i32 [[ADD1]], -1
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr null, i32 [[ADD1]])
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], -1
+; CHECK-NEXT: [[SUB3:%.*]] = add i32 [[ADD2]], [[CALL]]
+; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[SUB3]], [[TMP0]]
+; CHECK-NEXT: store i32 [[ADD4]], ptr [[K]], align 16
+; CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr i8, ptr [[K]], i64 4
+; CHECK-NEXT: store i32 0, ptr [[ARRAYINIT_ELEMENT]], align 4
+; CHECK-NEXT: [[ARRAYINIT_ELEMENT5:%.*]] = getelementptr i8, ptr [[K]], i64 8
+; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[ADD2]], [[SUB2]]
+; CHECK-NEXT: [[SUB8:%.*]] = add i32 [[ADD7]], [[TMP0]]
+; CHECK-NEXT: store i32 [[SUB8]], ptr [[ARRAYINIT_ELEMENT5]], align 8
+; CHECK-NEXT: [[ARRAYINIT_ELEMENT9:%.*]] = getelementptr i8, ptr [[K]], i64 12
+; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[ADD13]], [[TMP0]]
+; CHECK-NEXT: [[ADD11:%.*]] = add i32 [[ADD10]], [[ADD1]]
+; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[ADD11]], [[TMP0]]
+; CHECK-NEXT: store i32 [[ADD12]], ptr [[ARRAYINIT_ELEMENT9]], align 4
+; CHECK-NEXT: [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr null, ptr [[K]])
+; CHECK-NEXT: ret i32 [[CALL15]]
+;
+entry:
+ %k = alloca [4 x i32], align 16
+ %add1 = add i32 %0, %1
+ %sub2 = add i32 %add1, -1
+ %call = tail call i32 (ptr, ...) @printf(ptr null, i32 %add1)
+ %add2 = add i32 %1, -1
+ %sub3 = add i32 %add2, %call
+ %add4 = add i32 %sub3, %0
+ store i32 %add4, ptr %k, align 16
+ %arrayinit.element = getelementptr i8, ptr %k, i64 4
+ store i32 0, ptr %arrayinit.element, align 4
+ %arrayinit.element5 = getelementptr i8, ptr %k, i64 8
+ %add7 = add i32 %add2, %sub2
+ %sub8 = add i32 %add7, %0
+ store i32 %sub8, ptr %arrayinit.element5, align 8
+ %arrayinit.element9 = getelementptr i8, ptr %k, i64 12
+ %add13 = add i32 %1, 1
+ %add10 = add i32 %add13, %0
+ %add11 = add i32 %add10, %add1
+ %add12 = add i32 %add11, %0
+ store i32 %add12, ptr %arrayinit.element9, align 4
+ %call15 = call i32 (ptr, ...) @printf(ptr null, ptr %k)
+ ret i32 %call15
+}
+
+declare i32 @printf(ptr, ...)
More information about the llvm-commits
mailing list