[llvm] [SandboxVec][Scheduler] Enforce scheduling SchedBundle instrs back-to-back (PR #128092)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 17:06:14 PST 2025


https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/128092

>From b3dac160993f1dc064c221f24a4f7229894e0867 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Thu, 6 Feb 2025 20:47:45 -0800
Subject: [PATCH] [SandboxVec][Scheduler] Enforce scheduling SchedBundle instrs
 back-to-back

This patch fixes the behavior of the scheduler by making sure the instrs that
are part of a SchedBundle are scheduled back-to-back.
---
 .../Vectorize/SandboxVectorizer/Scheduler.h   |  4 +
 .../Vectorize/SandboxVectorizer/Scheduler.cpp | 91 ++++++++++++++-----
 .../SandboxVectorizer/bottomup_basic.ll       |  4 +-
 .../Transforms/SandboxVectorizer/scheduler.ll | 12 +--
 .../SandboxVectorizer/SchedulerTest.cpp       | 47 ++++++++++
 5 files changed, 127 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
index c2bdb40ff96dd..2af3c6d0ea517 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -150,6 +150,10 @@ class SchedBundle {
   DGNode *getBot() const;
   /// Move all bundle instructions to \p Where back-to-back.
   void cluster(BasicBlock::iterator Where);
+  /// \Returns true if all nodes in the bundle are ready.
+  bool ready() const {
+    return all_of(Nodes, [](const auto *N) { return N->ready(); });
+  }
 #ifndef NDEBUG
   void dump(raw_ostream &OS) const;
   LLVM_DUMP_METHOD void dump() const;
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
index ad46683d95063..4893d9177eead 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
@@ -125,36 +125,80 @@ SchedBundle *Scheduler::createBundle(ArrayRef<Instruction *> Instrs) {
 void Scheduler::eraseBundle(SchedBundle *SB) { Bndls.erase(SB); }
 
 bool Scheduler::tryScheduleUntil(ArrayRef<Instruction *> Instrs) {
-  // Use a set of instructions, instead of `Instrs` for fast lookups.
-  DenseSet<Instruction *> InstrsToDefer(Instrs.begin(), Instrs.end());
-  // This collects the nodes that correspond to instructions found in `Instrs`
-  // that have just become ready. These nodes won't be scheduled right away.
-  SmallVector<DGNode *, 8> DeferredNodes;
-
+  // Create a bundle for Instrs. If it turns out the schedule is infeasible we
+  // will dismantle it.
+  auto *InstrsSB = createBundle(Instrs);
   // Keep scheduling ready nodes until we either run out of ready nodes (i.e.,
   // ReadyList is empty), or all nodes that correspond to `Instrs` (the nodes of
   // which are collected in DeferredNodes) are all ready to schedule.
-  while (!ReadyList.empty()) {
-    auto *ReadyN = ReadyList.pop();
-    if (InstrsToDefer.contains(ReadyN->getInstruction())) {
-      // If the ready instruction is one of those in `Instrs`, then we don't
-      // schedule it right away. Instead we defer it until we can schedule it
-      // along with the rest of the instructions in `Instrs`, at the same
-      // time in a single scheduling bundle.
-      DeferredNodes.push_back(ReadyN);
-      bool ReadyToScheduleDeferred = DeferredNodes.size() == Instrs.size();
-      if (ReadyToScheduleDeferred) {
-        scheduleAndUpdateReadyList(*createBundle(Instrs));
+  SmallVector<DGNode *> Retry;
+  bool KeepScheduling = true;
+  while (KeepScheduling) {
+    enum class TryScheduleRes {
+      Success,  ///> We successfully scheduled the bundle.
+      Failure,  ///> We failed to schedule the bundle.
+      Finished, ///> We successfully scheduled the bundle and it is the last
+                /// bundle to be scheduled.
+    };
+    /// TryScheduleNode() attempts to schedule all DAG nodes in the bundle that
+    /// ReadyN is in. If it's not in a bundle it will create a singleton bundle
+    /// and will try to schedule it.
+    auto TryScheduleBndl = [this, InstrsSB](DGNode *ReadyN) -> TryScheduleRes {
+      auto *SB = ReadyN->getSchedBundle();
+      if (SB == nullptr) {
+        // If ReadyN does not belong to a bundle, create a singleton bundle
+        // and schedule it.
+        auto *SingletonSB = createBundle({ReadyN->getInstruction()});
+        scheduleAndUpdateReadyList(*SingletonSB);
+        return TryScheduleRes::Success;
+      }
+      if (SB->ready()) {
+        // Remove the rest of the bundle from the ready list.
+        // TODO: Perhaps change the Scheduler + ReadyList to operate on
+        // SchedBundles instead of DGNodes.
+        for (auto *N : *SB) {
+          if (N != ReadyN)
+            ReadyList.remove(N);
+        }
+        // If all nodes in the bundle are ready.
+        scheduleAndUpdateReadyList(*SB);
+        if (SB == InstrsSB)
+          // We just scheduled InstrsSB bundle, so we are done scheduling.
+          return TryScheduleRes::Finished;
+        return TryScheduleRes::Success;
+      }
+      return TryScheduleRes::Failure;
+    };
+    while (!ReadyList.empty()) {
+      auto *ReadyN = ReadyList.pop();
+      auto Res = TryScheduleBndl(ReadyN);
+      switch (Res) {
+      case TryScheduleRes::Success:
+        // We successfully scheduled ReadyN, keep scheduling.
+        continue;
+      case TryScheduleRes::Failure:
+        // We failed to schedule ReadyN, defer it to later and keep scheduling
+        // other ready instructions.
+        Retry.push_back(ReadyN);
+        continue;
+      case TryScheduleRes::Finished:
+        // We successfully scheduled the instruction bundle, so we are done.
         return true;
       }
-    } else {
-      // If the ready instruction is not found in `Instrs`, then we wrap it in a
-      // scheduling bundle and schedule it right away.
-      scheduleAndUpdateReadyList(*createBundle({ReadyN->getInstruction()}));
+      llvm_unreachable("Unhandled TrySchedule() result");
+    }
+    // Try to schedule nodes from the Retry list.
+    KeepScheduling = false;
+    for (auto *N : make_early_inc_range(Retry)) {
+      auto Res = TryScheduleBndl(N);
+      if (Res == TryScheduleRes::Success) {
+        Retry.erase(find(Retry, N));
+        KeepScheduling = true;
+      }
     }
   }
-  assert(DeferredNodes.size() != Instrs.size() &&
-         "We should have succesfully scheduled and early-returned!");
+
+  eraseBundle(InstrsSB);
   return false;
 }
 
@@ -275,6 +319,7 @@ bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) {
     // If one or more instrs are already scheduled we need to destroy the
     // top-most part of the schedule that includes the instrs in the bundle and
     // re-schedule.
+    DAG.extend(Instrs);
     trimSchedule(Instrs);
     ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator());
     return tryScheduleUntil(Instrs);
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index 531ed8cb618fc..6baffebd65edc 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -360,8 +360,8 @@ define void @vecInstrsPlacement(ptr %ptr0) {
 ; CHECK-SAME: ptr [[PTR0:%.*]]) {
 ; CHECK-NEXT:    [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
-; CHECK-NEXT:    [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
-; CHECK-NEXT:    [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
+; CHECK-NEXT:    [[VEC2:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]]
+; CHECK-NEXT:    [[VEC:%.*]] = fmul <2 x double> [[VECL2]], [[VECL]]
 ; CHECK-NEXT:    [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]]
 ; CHECK-NEXT:    store <2 x double> [[VEC5]], ptr [[PTR0]], align 8
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
index 5b9177ba4b3bf..5306661f13fa6 100644
--- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
@@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) {
 ; CHECK-LABEL: define void @check_dag_scheduler_update(
 ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) {
 ; CHECK-NEXT:    [[I:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT:    [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33
 ; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
-; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34
 ; CHECK-NEXT:    [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
 ; CHECK-NEXT:    [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0
-; CHECK-NEXT:    [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1
+; CHECK-NEXT:    [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1
 ; CHECK-NEXT:    [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2
-; CHECK-NEXT:    [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3
+; CHECK-NEXT:    [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3
+; CHECK-NEXT:    [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
 ; CHECK-NEXT:    [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]]
 ; CHECK-NEXT:    store <4 x i32> [[VEC]], ptr [[P1]], align 4
 ; CHECK-NEXT:    ret void
@@ -56,9 +56,9 @@ define <4 x float> @check_top_of_schedule(ptr %0) {
 ; CHECK-LABEL: define <4 x float> @check_top_of_schedule(
 ; CHECK-SAME: ptr [[TMP0:%.*]]) {
 ; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
 ; CHECK-NEXT:    [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float
 ; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
 ; CHECK-NEXT:    store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
 ; CHECK-NEXT:    ret <4 x float> [[INS_2]]
 ;
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
index f827bd7424a06..48703dfc72eb8 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
@@ -324,6 +324,53 @@ define void @foo(ptr noalias %ptr0, ptr noalias %ptr1, i8 %arg) {
   EXPECT_TRUE(Sched.trySchedule({L0, L1}));
 }
 
+// Make sure that instructions in  SchedBundles are always scheduled
+// back-to-back
+TEST_F(SchedulerTest, SchedBundleBackToBack) {
+  parseIR(C, R"IR(
+define void @foo(ptr %ptr, i16 %arg) {
+  %gep0 = getelementptr i32, ptr %ptr, i64 0
+  %gep1 = getelementptr i32, ptr %ptr, i64 1
+  %zextX = zext i16 0 to i32
+  %zext1 = zext i16 0 to i32
+  %zext0 = zext i16 %arg to i32
+  %shl1 = shl i32 %zextX, 0
+  %shl0 = shl i32 %zext1, 0
+  %sub1 = sub i32 %zext1, %shl1
+  %sub0 = sub i32 %zext0, %shl0
+  store i32 %sub1, ptr %gep1
+  store i32 %sub0, ptr %gep0
+  ret void
+})IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  auto *F = Ctx.createFunction(LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *Gep0 = cast<sandboxir::GetElementPtrInst>(&*It++);
+  auto *Gep1 = cast<sandboxir::GetElementPtrInst>(&*It++);
+  auto *ZextX = cast<sandboxir::CastInst>(&*It++);
+  auto *Zext1 = cast<sandboxir::CastInst>(&*It++);
+  auto *Zext0 = cast<sandboxir::CastInst>(&*It++);
+  auto *Shl1 = cast<sandboxir::BinaryOperator>(&*It++);
+  auto *Shl0 = cast<sandboxir::BinaryOperator>(&*It++);
+  auto *Sub1 = cast<sandboxir::BinaryOperator>(&*It++);
+  auto *Sub0 = cast<sandboxir::BinaryOperator>(&*It++);
+  auto *S0 = cast<sandboxir::StoreInst>(&*It++);
+  auto *S1 = cast<sandboxir::StoreInst>(&*It++);
+
+  sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx);
+  EXPECT_TRUE(Sched.trySchedule({S0, S1}));
+  EXPECT_TRUE(Sched.trySchedule({Zext0, Zext1}));
+  EXPECT_TRUE(Sched.trySchedule({Shl0, Shl1}));
+  auto BackToBack = [](sandboxir::Instruction *I1, sandboxir::Instruction *I2) {
+    return I1->getNextNode() == I2 || I2->getNextNode() == I1;
+  };
+  EXPECT_TRUE(BackToBack(S0, S1));
+  EXPECT_TRUE(BackToBack(Zext0, Zext1));
+  EXPECT_TRUE(BackToBack(Shl0, Shl1));
+}
+
 // Test that an instruction can't belong in two bundles!
 TEST_F(SchedulerTest, CheckBundles) {
   parseIR(C, R"IR(



More information about the llvm-commits mailing list