[llvm] [AMDGPU] AMDGPUIGroupLP: Avoid repeating reachability checks in greedy algorithm (PR #182463)

Fri Feb 20 04:16:22 PST 2026

https://github.com/frederik-h updated https://github.com/llvm/llvm-project/pull/182463

>From 03ca583cdd87d392262d086026d9ddb6ea7ce312 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Tue, 17 Feb 2026 03:23:53 -0500
Subject: [PATCH 1/3] [AMDGPU] AMDGPUIGroupLP: Avoid repeating reachability
 checks in greedy algorithm

In the greedy pipeline solver, the group cost is found using the
addEdges function and the edges must be removed from the DAG after
processing each group. The best group edges are then re-inserted using
the same function. This repeats the costly reachability checks which
become problematic for pipelines with many SchedGroups.

The algorithm is changed to remember the best group edges instead of
recomputing them.  Additionally, SchedGroup::tryAddEdge is refactored
to avoid a redundant cycle checks which is already performed by
DAG->addEdge.
---
 llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 39 ++++++++++++++++-------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 10ffbe281beac..15574d6686cba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -115,6 +115,13 @@ class InstructionRule {
 
 using SUnitsToCandidateSGsMap = DenseMap<SUnit *, SmallVector<int, 4>>;
 
+namespace {
+/// Try to add and edge from SU \p A to SU \p B to the \p DAG.
+bool tryAddEdge(ScheduleDAGInstrs *DAG, SUnit *A, SUnit *B) {
+  return A != B && DAG->addEdge(B, SDep(A, SDep::Artificial));
+}
+} // namespace
+
 // Classify instructions into groups to enable fine tuned control over the
 // scheduler. These groups may be more specific than current SchedModel
 // instruction classes.
@@ -702,6 +709,7 @@ void PipelineSolver::greedyFind(
   int TempCost;
   SchedGroup *BestGroup = nullptr;
   int BestGroupID = -1;
+  std::vector<std::pair<SUnit *, SUnit *>> BestEdges;
   auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
   LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
                     << ") in Pipeline # " << CurrSyncGroupIdx << "\n");
@@ -711,7 +719,6 @@ void PipelineSolver::greedyFind(
   // first. If we fail to do this for the greedy algorithm, the solution will
   // likely not be good in more complex cases.
   for (; I != E; ++I) {
-    std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
     int CandSGID = *I;
     SchedGroup *Match = llvm::find_if(SyncPipeline, [CandSGID](SchedGroup &SG) {
       return SG.getSGID() == CandSGID;
@@ -729,21 +736,35 @@ void PipelineSolver::greedyFind(
       LLVM_DEBUG(dbgs() << "SGID # " << CandSGID << " has conflicting rule\n");
       continue;
     }
-    TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges);
+
+    std::vector<std::pair<SUnit *, SUnit *>> TempEdges;
+    TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, TempEdges);
     LLVM_DEBUG(dbgs() << "Cost of Group " << TempCost << "\n");
+
     if (TempCost < BestNodeCost || BestNodeCost == -1) {
+      BestEdges = TempEdges;
       BestGroup = Match;
       BestNodeCost = TempCost;
       BestGroupID = CandSGID;
+
+      if (BestNodeCost == 0)
+        break;
+
+      removeEdges(BestEdges);
     }
-    removeEdges(AddedEdges);
-    if (BestNodeCost == 0)
-      break;
+
+    removeEdges(TempEdges);
   }
 
   if (BestGroupID != -1) {
     BestGroup->add(*CurrSU.first);
-    addEdges(SyncPipeline, CurrSU.first, BestGroupID, AddedEdges);
+
+    for (auto &E : BestEdges) {
+      AddedEdges.push_back(E);
+      [[maybe_unused]] bool Added = tryAddEdge(DAG, E.first, E.second);
+      assert(Added && "Edges known to be insertable.");
+    }
+
     LLVM_DEBUG(dbgs() << "Best Group has ID: " << BestGroupID << " and Mask"
                       << (int)BestGroup->getMask() << "\n");
     BestCost += TempCost;
@@ -2381,11 +2402,7 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
 unsigned SchedGroup::NumSchedGroups = 0;
 
 bool SchedGroup::tryAddEdge(SUnit *A, SUnit *B) {
-  if (A != B && DAG->canAddEdge(B, A)) {
-    DAG->addEdge(B, SDep(A, SDep::Artificial));
-    return true;
-  }
-  return false;
+  return ::tryAddEdge(DAG, A, B);
 }
 
 bool SchedGroup::canAddMI(const MachineInstr &MI) const {

>From 0590c7d65b86e1c0d6e108b936af5dd4b679d036 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Fri, 20 Feb 2026 05:40:52 -0500
Subject: [PATCH 2/3] Review changes

---
 llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 15574d6686cba..830642b1d7981 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -115,12 +115,10 @@ class InstructionRule {
 
 using SUnitsToCandidateSGsMap = DenseMap<SUnit *, SmallVector<int, 4>>;
 
-namespace {
 /// Try to add and edge from SU \p A to SU \p B to the \p DAG.
-bool tryAddEdge(ScheduleDAGInstrs *DAG, SUnit *A, SUnit *B) {
+static bool tryAddEdge(ScheduleDAGInstrs *DAG, SUnit *A, SUnit *B) {
   return A != B && DAG->addEdge(B, SDep(A, SDep::Artificial));
 }
-} // namespace
 
 // Classify instructions into groups to enable fine tuned control over the
 // scheduler. These groups may be more specific than current SchedModel
@@ -761,8 +759,8 @@ void PipelineSolver::greedyFind(
 
     for (auto &E : BestEdges) {
       AddedEdges.push_back(E);
-      [[maybe_unused]] bool Added = tryAddEdge(DAG, E.first, E.second);
-      assert(Added && "Edges known to be insertable.");
+      if (!tryAddEdge(DAG, E.first, E.second))
+        llvm_unreachable("Edges known to be insertable.");
     }
 
     LLVM_DEBUG(dbgs() << "Best Group has ID: " << BestGroupID << " and Mask"

>From 95564fa01682537ea4b9e130b65c428668aebe72 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Fri, 20 Feb 2026 07:13:50 -0500
Subject: [PATCH 3/3] Use list instead of vector for AddedEdges

---
 llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 42 +++++++++++------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 830642b1d7981..b7100a659c952 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -171,7 +171,7 @@ class SchedGroup {
   // Add DAG dependencies and track which edges are added, and the count of
   // missed edges
   int link(SUnit &SU, bool MakePred,
-           std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
+           std::list<std::pair<SUnit *, SUnit *>> &AddedEdges);
 
   // Add DAG dependencies from all SUnits in this SchedGroup and this SU.
   // Use the predicate to determine whether SU should be a predecessor (P =
@@ -312,7 +312,7 @@ class PipelineSolver {
   // current information. One step in the greedy algorithm. Templated against
   // the SchedGroup iterator (either reverse or forward).
   template <typename T>
-  void greedyFind(std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I,
+  void greedyFind(std::list<std::pair<SUnit *, SUnit *>> &AddedEdges, T I,
                   T E);
   // Whether or not the current solution is optimal
   bool checkOptimal();
@@ -329,15 +329,15 @@ class PipelineSolver {
   // Add the edges from the SU to the other SchedGroups in pipeline, and
   // return the number of edges missed.
   int addEdges(SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
-               std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
+               std::list<std::pair<SUnit *, SUnit *>> &AddedEdges);
   /// Link the pipeline as if \p SU was in the SchedGroup with ID \p SGID. It
   /// returns the cost (in terms of missed pipeline edges), and tracks the edges
   /// added in \p AddedEdges
   template <typename T>
   int linkSUnit(SUnit *SU, int SGID,
-                std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E);
+                std::list<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E);
   /// Remove the edges passed via \p AddedEdges
-  void removeEdges(const std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
+  void removeEdges(const std::list<std::pair<SUnit *, SUnit *>> &AddedEdges);
   // Convert the passed in maps to arrays for bidirectional iterators
   void convertSyncMapsToArrays();
 
@@ -461,7 +461,7 @@ void PipelineSolver::makePipeline() {
 
 template <typename T>
 int PipelineSolver::linkSUnit(
-    SUnit *SU, int SGID, std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges,
+    SUnit *SU, int SGID, std::list<std::pair<SUnit *, SUnit *>> &AddedEdges,
     T I, T E) {
   bool MakePred = false;
   int AddedCost = 0;
@@ -479,7 +479,7 @@ int PipelineSolver::linkSUnit(
 
 int PipelineSolver::addEdges(
     SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
-    std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) {
+    std::list<std::pair<SUnit *, SUnit *>> &AddedEdges) {
 
   // For IsBottomUp, the first SchedGroup in SyncPipeline contains the
   // instructions that are the ultimate successors in the resultant mutation.
@@ -496,7 +496,7 @@ int PipelineSolver::addEdges(
 }
 
 void PipelineSolver::removeEdges(
-    const std::vector<std::pair<SUnit *, SUnit *>> &EdgesToRemove) {
+    const std::list<std::pair<SUnit *, SUnit *>> &EdgesToRemove) {
   // Only remove the edges that we have added when testing
   // the fit.
   for (auto &PredSuccPair : EdgesToRemove) {
@@ -575,7 +575,7 @@ void PipelineSolver::populateReadyList(
   assert(CurrSU.second.size() >= 1);
 
   for (; I != E; ++I) {
-    std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
+    std::list<std::pair<SUnit *, SUnit *>> AddedEdges;
     int CandSGID = *I;
     SchedGroup *Match = llvm::find_if(SyncPipeline, [CandSGID](SchedGroup &SG) {
       return SG.getSGID() == CandSGID;
@@ -634,7 +634,7 @@ bool PipelineSolver::solveExact() {
 
     int CandSGID = I->first;
     int AddedCost = 0;
-    std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
+    std::list<std::pair<SUnit *, SUnit *>> AddedEdges;
     auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
     SchedGroup *Match;
     for (auto &SG : SyncPipeline) {
@@ -701,13 +701,13 @@ bool PipelineSolver::solveExact() {
 
 template <typename T>
 void PipelineSolver::greedyFind(
-    std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E) {
+    std::list<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E) {
   SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
   int BestNodeCost = -1;
   int TempCost;
   SchedGroup *BestGroup = nullptr;
   int BestGroupID = -1;
-  std::vector<std::pair<SUnit *, SUnit *>> BestEdges;
+  std::list<std::pair<SUnit *, SUnit *>> BestEdges;
   auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
   LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
                     << ") in Pipeline # " << CurrSyncGroupIdx << "\n");
@@ -735,7 +735,7 @@ void PipelineSolver::greedyFind(
       continue;
     }
 
-    std::vector<std::pair<SUnit *, SUnit *>> TempEdges;
+    std::list<std::pair<SUnit *, SUnit *>> TempEdges;
     TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, TempEdges);
     LLVM_DEBUG(dbgs() << "Cost of Group " << TempCost << "\n");
 
@@ -756,12 +756,12 @@ void PipelineSolver::greedyFind(
 
   if (BestGroupID != -1) {
     BestGroup->add(*CurrSU.first);
-
-    for (auto &E : BestEdges) {
-      AddedEdges.push_back(E);
-      if (!tryAddEdge(DAG, E.first, E.second))
-        llvm_unreachable("Edges known to be insertable.");
-    }
+    AddedEdges.splice(AddedEdges.end(), BestEdges);
+    std::for_each(BestEdges.begin(), BestEdges.end(),
+                  [this](std::pair<SUnit *, SUnit *> E) {
+                    if (!tryAddEdge(DAG, E.first, E.second))
+                      llvm_unreachable("Edges known to be insertable.");
+                  });
 
     LLVM_DEBUG(dbgs() << "Best Group has ID: " << BestGroupID << " and Mask"
                       << (int)BestGroup->getMask() << "\n");
@@ -774,7 +774,7 @@ void PipelineSolver::greedyFind(
 
 bool PipelineSolver::solveGreedy() {
   BestCost = 0;
-  std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
+  std::list<std::pair<SUnit *, SUnit *>> AddedEdges;
 
   while (static_cast<size_t>(CurrSyncGroupIdx) < PipelineInstrs.size()) {
     SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
@@ -2520,7 +2520,7 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
 }
 
 int SchedGroup::link(SUnit &SU, bool MakePred,
-                     std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) {
+                     std::list<std::pair<SUnit *, SUnit *>> &AddedEdges) {
   int MissedEdges = 0;
   for (auto *A : Collection) {
     SUnit *B = &SU;