[llvm] [SandboxVec][Scheduler] Enforce scheduling SchedBundle instrs back-to-back (PR #128092)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 17:06:14 PST 2025
https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/128092
>From b3dac160993f1dc064c221f24a4f7229894e0867 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Thu, 6 Feb 2025 20:47:45 -0800
Subject: [PATCH] [SandboxVec][Scheduler] Enforce scheduling SchedBundle instrs
back-to-back
This patch fixes the behavior of the scheduler by making sure the instrs that
are part of a SchedBundle are scheduled back-to-back.
---
.../Vectorize/SandboxVectorizer/Scheduler.h | 4 +
.../Vectorize/SandboxVectorizer/Scheduler.cpp | 91 ++++++++++++++-----
.../SandboxVectorizer/bottomup_basic.ll | 4 +-
.../Transforms/SandboxVectorizer/scheduler.ll | 12 +--
.../SandboxVectorizer/SchedulerTest.cpp | 47 ++++++++++
5 files changed, 127 insertions(+), 31 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
index c2bdb40ff96dd..2af3c6d0ea517 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -150,6 +150,10 @@ class SchedBundle {
DGNode *getBot() const;
/// Move all bundle instructions to \p Where back-to-back.
void cluster(BasicBlock::iterator Where);
+ /// \Returns true if all nodes in the bundle are ready.
+ bool ready() const {
+ return all_of(Nodes, [](const auto *N) { return N->ready(); });
+ }
#ifndef NDEBUG
void dump(raw_ostream &OS) const;
LLVM_DUMP_METHOD void dump() const;
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
index ad46683d95063..4893d9177eead 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
@@ -125,36 +125,80 @@ SchedBundle *Scheduler::createBundle(ArrayRef<Instruction *> Instrs) {
void Scheduler::eraseBundle(SchedBundle *SB) { Bndls.erase(SB); }
bool Scheduler::tryScheduleUntil(ArrayRef<Instruction *> Instrs) {
- // Use a set of instructions, instead of `Instrs` for fast lookups.
- DenseSet<Instruction *> InstrsToDefer(Instrs.begin(), Instrs.end());
- // This collects the nodes that correspond to instructions found in `Instrs`
- // that have just become ready. These nodes won't be scheduled right away.
- SmallVector<DGNode *, 8> DeferredNodes;
-
+ // Create a bundle for Instrs. If it turns out the schedule is infeasible we
+ // will dismantle it.
+ auto *InstrsSB = createBundle(Instrs);
// Keep scheduling ready nodes until we either run out of ready nodes (i.e.,
// ReadyList is empty), or all nodes that correspond to `Instrs` (the nodes of
// which are collected in DeferredNodes) are all ready to schedule.
- while (!ReadyList.empty()) {
- auto *ReadyN = ReadyList.pop();
- if (InstrsToDefer.contains(ReadyN->getInstruction())) {
- // If the ready instruction is one of those in `Instrs`, then we don't
- // schedule it right away. Instead we defer it until we can schedule it
- // along with the rest of the instructions in `Instrs`, at the same
- // time in a single scheduling bundle.
- DeferredNodes.push_back(ReadyN);
- bool ReadyToScheduleDeferred = DeferredNodes.size() == Instrs.size();
- if (ReadyToScheduleDeferred) {
- scheduleAndUpdateReadyList(*createBundle(Instrs));
+ SmallVector<DGNode *> Retry;
+ bool KeepScheduling = true;
+ while (KeepScheduling) {
+ enum class TryScheduleRes {
+ Success, ///> We successfully scheduled the bundle.
+ Failure, ///> We failed to schedule the bundle.
+ Finished, ///> We successfully scheduled the bundle and it is the last
+ /// bundle to be scheduled.
+ };
+ /// TryScheduleNode() attempts to schedule all DAG nodes in the bundle that
+ /// ReadyN is in. If it's not in a bundle it will create a singleton bundle
+ /// and will try to schedule it.
+ auto TryScheduleBndl = [this, InstrsSB](DGNode *ReadyN) -> TryScheduleRes {
+ auto *SB = ReadyN->getSchedBundle();
+ if (SB == nullptr) {
+ // If ReadyN does not belong to a bundle, create a singleton bundle
+ // and schedule it.
+ auto *SingletonSB = createBundle({ReadyN->getInstruction()});
+ scheduleAndUpdateReadyList(*SingletonSB);
+ return TryScheduleRes::Success;
+ }
+ if (SB->ready()) {
+ // Remove the rest of the bundle from the ready list.
+ // TODO: Perhaps change the Scheduler + ReadyList to operate on
+ // SchedBundles instead of DGNodes.
+ for (auto *N : *SB) {
+ if (N != ReadyN)
+ ReadyList.remove(N);
+ }
+ // If all nodes in the bundle are ready.
+ scheduleAndUpdateReadyList(*SB);
+ if (SB == InstrsSB)
+ // We just scheduled InstrsSB bundle, so we are done scheduling.
+ return TryScheduleRes::Finished;
+ return TryScheduleRes::Success;
+ }
+ return TryScheduleRes::Failure;
+ };
+ while (!ReadyList.empty()) {
+ auto *ReadyN = ReadyList.pop();
+ auto Res = TryScheduleBndl(ReadyN);
+ switch (Res) {
+ case TryScheduleRes::Success:
+ // We successfully scheduled ReadyN, keep scheduling.
+ continue;
+ case TryScheduleRes::Failure:
+ // We failed to schedule ReadyN, defer it to later and keep scheduling
+ // other ready instructions.
+ Retry.push_back(ReadyN);
+ continue;
+ case TryScheduleRes::Finished:
+ // We successfully scheduled the instruction bundle, so we are done.
return true;
}
- } else {
- // If the ready instruction is not found in `Instrs`, then we wrap it in a
- // scheduling bundle and schedule it right away.
- scheduleAndUpdateReadyList(*createBundle({ReadyN->getInstruction()}));
+ llvm_unreachable("Unhandled TrySchedule() result");
+ }
+ // Try to schedule nodes from the Retry list.
+ KeepScheduling = false;
+ for (auto *N : make_early_inc_range(Retry)) {
+ auto Res = TryScheduleBndl(N);
+ if (Res == TryScheduleRes::Success) {
+ Retry.erase(find(Retry, N));
+ KeepScheduling = true;
+ }
}
}
- assert(DeferredNodes.size() != Instrs.size() &&
- "We should have succesfully scheduled and early-returned!");
+
+ eraseBundle(InstrsSB);
return false;
}
@@ -275,6 +319,7 @@ bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) {
// If one or more instrs are already scheduled we need to destroy the
// top-most part of the schedule that includes the instrs in the bundle and
// re-schedule.
+ DAG.extend(Instrs);
trimSchedule(Instrs);
ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator());
return tryScheduleUntil(Instrs);
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index 531ed8cb618fc..6baffebd65edc 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -360,8 +360,8 @@ define void @vecInstrsPlacement(ptr %ptr0) {
; CHECK-SAME: ptr [[PTR0:%.*]]) {
; CHECK-NEXT: [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
; CHECK-NEXT: [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
-; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
-; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
+; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]]
+; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]]
; CHECK-NEXT: [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]]
; CHECK-NEXT: store <2 x double> [[VEC5]], ptr [[PTR0]], align 8
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
index 5b9177ba4b3bf..5306661f13fa6 100644
--- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
@@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) {
; CHECK-LABEL: define void @check_dag_scheduler_update(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) {
; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
-; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
-; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32
+; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34
; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0
-; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1
+; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1
; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2
-; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3
+; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3
+; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
; CHECK-NEXT: [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]]
; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[P1]], align 4
; CHECK-NEXT: ret void
@@ -56,9 +56,9 @@ define <4 x float> @check_top_of_schedule(ptr %0) {
; CHECK-LABEL: define <4 x float> @check_top_of_schedule(
; CHECK-SAME: ptr [[TMP0:%.*]]) {
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
; CHECK-NEXT: [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
; CHECK-NEXT: store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
; CHECK-NEXT: ret <4 x float> [[INS_2]]
;
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
index f827bd7424a06..48703dfc72eb8 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
@@ -324,6 +324,53 @@ define void @foo(ptr noalias %ptr0, ptr noalias %ptr1, i8 %arg) {
EXPECT_TRUE(Sched.trySchedule({L0, L1}));
}
+// Make sure that instructions in SchedBundles are always scheduled
+// back-to-back
+TEST_F(SchedulerTest, SchedBundleBackToBack) {
+ parseIR(C, R"IR(
+define void @foo(ptr %ptr, i16 %arg) {
+ %gep0 = getelementptr i32, ptr %ptr, i64 0
+ %gep1 = getelementptr i32, ptr %ptr, i64 1
+ %zextX = zext i16 0 to i32
+ %zext1 = zext i16 0 to i32
+ %zext0 = zext i16 %arg to i32
+ %shl1 = shl i32 %zextX, 0
+ %shl0 = shl i32 %zext1, 0
+ %sub1 = sub i32 %zext1, %shl1
+ %sub0 = sub i32 %zext0, %shl0
+ store i32 %sub1, ptr %gep1
+ store i32 %sub0, ptr %gep0
+ ret void
+})IR");
+ llvm::Function *LLVMF = &*M->getFunction("foo");
+ sandboxir::Context Ctx(C);
+ auto *F = Ctx.createFunction(LLVMF);
+ auto *BB = &*F->begin();
+ auto It = BB->begin();
+ auto *Gep0 = cast<sandboxir::GetElementPtrInst>(&*It++);
+ auto *Gep1 = cast<sandboxir::GetElementPtrInst>(&*It++);
+ auto *ZextX = cast<sandboxir::CastInst>(&*It++);
+ auto *Zext1 = cast<sandboxir::CastInst>(&*It++);
+ auto *Zext0 = cast<sandboxir::CastInst>(&*It++);
+ auto *Shl1 = cast<sandboxir::BinaryOperator>(&*It++);
+ auto *Shl0 = cast<sandboxir::BinaryOperator>(&*It++);
+ auto *Sub1 = cast<sandboxir::BinaryOperator>(&*It++);
+ auto *Sub0 = cast<sandboxir::BinaryOperator>(&*It++);
+ auto *S0 = cast<sandboxir::StoreInst>(&*It++);
+ auto *S1 = cast<sandboxir::StoreInst>(&*It++);
+
+ sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx);
+ EXPECT_TRUE(Sched.trySchedule({S0, S1}));
+ EXPECT_TRUE(Sched.trySchedule({Zext0, Zext1}));
+ EXPECT_TRUE(Sched.trySchedule({Shl0, Shl1}));
+ auto BackToBack = [](sandboxir::Instruction *I1, sandboxir::Instruction *I2) {
+ return I1->getNextNode() == I2 || I2->getNextNode() == I1;
+ };
+ EXPECT_TRUE(BackToBack(S0, S1));
+ EXPECT_TRUE(BackToBack(Zext0, Zext1));
+ EXPECT_TRUE(BackToBack(Shl0, Shl1));
+}
+
// Test that an instruction can't belong in two bundles!
TEST_F(SchedulerTest, CheckBundles) {
parseIR(C, R"IR(
More information about the llvm-commits
mailing list