[llvm] [AMDGPU] IGroupLP: Refactor SchedGroup::initSchedGroup (NFC) (PR #184122)
Frederik Harwath via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 05:56:25 PST 2026
https://github.com/frederik-h updated https://github.com/llvm/llvm-project/pull/184122
>From d5df9e6a0c55d48707cdf3bf91274bfb9cd4eecb Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Mon, 2 Mar 2026 05:17:34 -0500
Subject: [PATCH 1/6] [AMDGPU] IGroupLP: Remove useless SchedGroup::IsFull call
(NFC)
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 10ffbe281beac..3ef224752bc38 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2573,10 +2573,8 @@ bool SchedGroup::canAddSU(SUnit &SU) const {
}
void SchedGroup::initSchedGroup() {
+ assert (!MaxSize && "Only called for SCHED_BARRIERs which don't have a size." );
for (auto &SU : DAG->SUnits) {
- if (isFull())
- break;
-
if (canAddSU(SU))
add(SU);
}
@@ -2584,18 +2582,22 @@ void SchedGroup::initSchedGroup() {
void SchedGroup::initSchedGroup(std::vector<SUnit>::reverse_iterator RIter,
SUnitsToCandidateSGsMap &SyncedInstrs) {
+ // This function is only used for SCHED_GROUP_BARRIER which always
+ // have a size.
+ assert(MaxSize);
+ assert(Collection.empty());
+
SUnit &InitSU = *RIter;
for (auto E = DAG->SUnits.rend(); RIter != E; ++RIter) {
auto &SU = *RIter;
- if (isFull())
- break;
if (canAddSU(SU))
SyncedInstrs[&SU].push_back(SGID);
}
+ // TODO Do not create SchedGroups with MaxSize 0?
+ assert((MaxSize == 0 || !isFull()) && "Nothing added to Collection");
add(InitSU);
- assert(MaxSize);
(*MaxSize)++;
}
@@ -2604,8 +2606,7 @@ void SchedGroup::initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs) {
auto E = DAG->SUnits.rend();
for (; I != E; ++I) {
auto &SU = *I;
- if (isFull())
- break;
+
if (canAddSU(SU))
SyncedInstrs[&SU].push_back(SGID);
}
>From 686053b15da0de5d74165502b7dcad4f987ed35b Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Mon, 2 Mar 2026 05:42:05 -0500
Subject: [PATCH 2/6] Remove outdated comment
The comment used to refer to a call to resetEdges which has been removed.
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 3ef224752bc38..67afb0d6c527b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2655,8 +2655,6 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
MachineInstr &MI = *SchedBarrier.getInstr();
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
- // Remove all existing edges from the SCHED_BARRIER that were added due to the
- // instruction having side effects.
LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: "
<< MI.getOperand(0).getImm() << "\n");
auto InvertedMask =
@@ -2712,8 +2710,6 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
std::vector<SUnit>::reverse_iterator RIter) {
- // Remove all existing edges from the SCHED_GROUP_BARRIER that were added due
- // to the instruction having side effects.
MachineInstr &SGB = *RIter->getInstr();
assert(SGB.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER);
int32_t SGMask = SGB.getOperand(0).getImm();
>From 9d7166fad416fbaffd884cbf037713b454f7a51f Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Mon, 2 Mar 2026 06:13:44 -0500
Subject: [PATCH 3/6] [AMDGPU] IGroupLP: Clean up SchedGroup::initSchedGroup
(NFC)
The function has different overloads two of which are only used for
very specific types of SchedGroups, namely SCHED_BARRIER and
SCHED_GROUP_BARRIER. This seems to have a led to some confusion
since the different functions perform checks which are not needed
for their intended restricted use cases. Furthermore, the doc comments
are wrong.
Rename functions, remove useless checks, fix comments.
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 47 +++++++++++------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 67afb0d6c527b..bf69c6d16fc5f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -209,16 +209,15 @@ class SchedGroup {
// Remove last element in the SchedGroup
void pop() { Collection.pop_back(); }
- // Identify and add all relevant SUs from the DAG to this SchedGroup.
- void initSchedGroup();
+ // Identify and add all relevant SUs from the DAG to this SchedGroup
+ // which represents a SCHED_BARRIER.
+ void initSchedBarrier();
- // Add instructions to the SchedGroup bottom up starting from RIter.
- // PipelineInstrs is a set of instructions that should not be added to the
- // SchedGroup even when the other conditions for adding it are satisfied.
- // RIter will be added to the SchedGroup as well, and dependencies will be
- // added so that RIter will always be scheduled at the end of the group.
- void initSchedGroup(std::vector<SUnit>::reverse_iterator RIter,
- SUnitsToCandidateSGsMap &SyncedInstrs);
+ // Add the SCHED_GROUP_BARRIER instruction to the SchedGroup and for
+ // each SU starting at RIter, add the SchedGroup's SGID to the
+ // collection of potential SchedGroups for SU in SyncedInstrs.
+ void initSchedGroupBarrier(std::vector<SUnit>::reverse_iterator RIter,
+ SUnitsToCandidateSGsMap &SyncedInstrs);
void initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs);
@@ -2572,33 +2571,33 @@ bool SchedGroup::canAddSU(SUnit &SU) const {
return std::all_of(B, E, [this](MachineInstr &MI) { return canAddMI(MI); });
}
-void SchedGroup::initSchedGroup() {
- assert (!MaxSize && "Only called for SCHED_BARRIERs which don't have a size." );
+void SchedGroup::initSchedBarrier() {
+ assert(!MaxSize && "SCHED_BARRIERs don't have a size.");
+ assert(Collection.empty());
+
for (auto &SU : DAG->SUnits) {
if (canAddSU(SU))
add(SU);
}
}
-void SchedGroup::initSchedGroup(std::vector<SUnit>::reverse_iterator RIter,
- SUnitsToCandidateSGsMap &SyncedInstrs) {
- // This function is only used for SCHED_GROUP_BARRIER which always
- // have a size.
- assert(MaxSize);
+void SchedGroup::initSchedGroupBarrier(
+ std::vector<SUnit>::reverse_iterator RIter,
+ SUnitsToCandidateSGsMap &SyncedInstrs) {
+ assert(MaxSize && "SCHED_GROUP_BARRIER always has size.");
assert(Collection.empty());
- SUnit &InitSU = *RIter;
+ // TODO Do not create SchedGroups with MaxSize 0?
+ assert((MaxSize == 0 || !isFull()) && "Nothing added to Collection");
+ add(*RIter);
+ (*MaxSize)++;
+
for (auto E = DAG->SUnits.rend(); RIter != E; ++RIter) {
auto &SU = *RIter;
if (canAddSU(SU))
SyncedInstrs[&SU].push_back(SGID);
}
-
- // TODO Do not create SchedGroups with MaxSize 0?
- assert((MaxSize == 0 || !isFull()) && "Nothing added to Collection");
- add(InitSU);
- (*MaxSize)++;
}
void SchedGroup::initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs) {
@@ -2660,7 +2659,7 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
auto InvertedMask =
invertSchedBarrierMask((SchedGroupMask)MI.getOperand(0).getImm());
SchedGroup SG(InvertedMask, std::nullopt, DAG, TII);
- SG.initSchedGroup();
+ SG.initSchedBarrier();
// Preserve original instruction ordering relative to the SCHED_BARRIER.
SG.link(
@@ -2719,7 +2718,7 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
auto &SG = SyncedSchedGroups[SyncID].emplace_back((SchedGroupMask)SGMask,
Size, SyncID, DAG, TII);
- SG.initSchedGroup(RIter, SyncedInstrs[SG.getSyncID()]);
+ SG.initSchedGroupBarrier(RIter, SyncedInstrs[SG.getSyncID()]);
}
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
>From 8a5adb20c168a6b21b67755717560138fb226b88 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Mon, 2 Mar 2026 06:50:41 -0500
Subject: [PATCH 4/6] [AMDGPU] IGroupLP: Extract function for assigning SUnits
to SchedGroup
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 25 ++++++++++++-----------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index bf69c6d16fc5f..d83cabd3ba38c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -209,6 +209,10 @@ class SchedGroup {
// Remove last element in the SchedGroup
void pop() { Collection.pop_back(); }
+ template <class T>
+ void findCandidateSUnits(T Begin, T End,
+ SUnitsToCandidateSGsMap &SyncedInstrs);
+
// Identify and add all relevant SUs from the DAG to this SchedGroup
// which represents a SCHED_BARRIER.
void initSchedBarrier();
@@ -2591,24 +2595,21 @@ void SchedGroup::initSchedGroupBarrier(
assert((MaxSize == 0 || !isFull()) && "Nothing added to Collection");
add(*RIter);
(*MaxSize)++;
-
- for (auto E = DAG->SUnits.rend(); RIter != E; ++RIter) {
- auto &SU = *RIter;
+ findCandidateSUnits(RIter, DAG->SUnits.rend(), SyncedInstrs);
+}
+
+template <class T>
+void SchedGroup::findCandidateSUnits(T Begin, T End,
+ SUnitsToCandidateSGsMap &SyncedInstrs) {
+ std::for_each(Begin, End, [this, &SyncedInstrs](SUnit &SU) {
if (canAddSU(SU))
SyncedInstrs[&SU].push_back(SGID);
- }
+ });
}
void SchedGroup::initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs) {
- auto I = DAG->SUnits.rbegin();
- auto E = DAG->SUnits.rend();
- for (; I != E; ++I) {
- auto &SU = *I;
-
- if (canAddSU(SU))
- SyncedInstrs[&SU].push_back(SGID);
- }
+ findCandidateSUnits(DAG->SUnits.rbegin(), DAG->SUnits.rend(), SyncedInstrs);
}
void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
>From 53848d5b75a3bbac69a8545ca8f168da638fbbee Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Mon, 2 Mar 2026 08:11:47 -0500
Subject: [PATCH 5/6] [AMDGPU] IGroupLP: Remove/rename initSchedGroup variants
The variants for SCHED_BARRIER and SCHED_GROUP_BARRIER became so
simple after the last refactoring steps that it seems appropriate to
inline them at their only call site. The remaining initSchedGroup
function does not modify the actual SchedGroup but the SyncedInstrs.
Calling it "findCandidateSUnits" seems better since this is the only
thing that it does.
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 139 +++++++++-------------
1 file changed, 56 insertions(+), 83 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index d83cabd3ba38c..f92ada7e94e40 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -213,17 +213,10 @@ class SchedGroup {
void findCandidateSUnits(T Begin, T End,
SUnitsToCandidateSGsMap &SyncedInstrs);
- // Identify and add all relevant SUs from the DAG to this SchedGroup
- // which represents a SCHED_BARRIER.
- void initSchedBarrier();
-
- // Add the SCHED_GROUP_BARRIER instruction to the SchedGroup and for
- // each SU starting at RIter, add the SchedGroup's SGID to the
- // collection of potential SchedGroups for SU in SyncedInstrs.
- void initSchedGroupBarrier(std::vector<SUnit>::reverse_iterator RIter,
- SUnitsToCandidateSGsMap &SyncedInstrs);
-
- void initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs);
+ /// Find each SUnit in the DAG that could potentially be added to
+ /// this SchedGroup and add the SGID to the candidate SchedGroups
+ /// for SU in \p SyncedInstrs.
+ void findCandidateSUnits(SUnitsToCandidateSGsMap &SyncedInstrs);
int getSyncID() { return SyncID; }
@@ -879,11 +872,11 @@ bool MFMASmallGemmOpt::applyIGLPStrategy(
for (unsigned I = 0; I < MFMACount * 3; ++I) {
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS, 2, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
return true;
@@ -1600,7 +1593,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(
std::make_shared<EnablesNthMFMA>(1, TII, SG->getSGID(), true));
SG->addRule(std::make_shared<IsFMA>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
// Second Round FMA
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
@@ -1613,7 +1606,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(std::make_shared<EnablesNthMFMA>(MFMAEnablement + 1, TII,
SG->getSGID(), true));
SG->addRule(std::make_shared<IsFMA>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
if (UsesDSRead) {
@@ -1621,7 +1614,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SchedGroupMask::DS_READ, 2, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<OccursAtOrAfterNode>(*FirstPipeDSR, TII,
SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// First Round EXP
@@ -1635,7 +1628,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(std::make_shared<IsPipeExp>(TII, SG->getSGID(), true));
SG->addRule(std::make_shared<LessThanNSuccs>(8, TII, SG->getSGID(),
HasChainBetweenCvt));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
incrementTransPosition();
@@ -1652,7 +1645,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
else
SG->addRule(std::make_shared<IsSuccOfPrevNthGroup>(
1 + (2 + UsesFMA) * I, TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Third Round FMA
@@ -1667,7 +1660,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(std::make_shared<EnablesNthMFMA>(2 * MFMAEnablement + 1,
TII, SG->getSGID(), true));
SG->addRule(std::make_shared<IsFMA>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Second Round EXP
@@ -1683,7 +1676,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(std::make_shared<IsPipeExp>(TII, SG->getSGID(), true));
SG->addRule(std::make_shared<LessThanNSuccs>(8, TII, SG->getSGID(),
HasChainBetweenCvt));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// The "extra" EXP which enables all MFMA
@@ -1693,7 +1686,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(std::make_shared<IsPipeExp>(TII, SG->getSGID(), true));
SG->addRule(std::make_shared<GreaterThanOrEqualToNSuccs>(
8, TII, SG->getSGID(), HasChainBetweenCvt));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
// PHASE 2: Main Interleave Loop
@@ -1730,14 +1723,14 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->getSGID(), true));
else
SG->addRule(std::make_shared<OccursAfterExp>(TII, SG->getSGID(), true));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
incrementMFMAPosition();
if (UsesVALU) {
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VALU, VALUOps, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsPipeAdd>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
if (UsesDSRead && !(I % 4)) {
@@ -1745,7 +1738,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SchedGroupMask::DS_READ, 2, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<OccursAtOrAfterNode>(*FirstPipeDSR, TII,
SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// CVT, EXP, FMA Interleaving
@@ -1773,7 +1766,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
else
SG->addRule(std::make_shared<IsSuccOfPrevNthGroup>(CurrentOffset, TII,
SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Round N + 3 FMA
@@ -1790,7 +1783,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
(((I * ExpRatio + J) / ExpRequirement) + 3) * MFMAEnablement + 1,
TII, SG->getSGID(), true));
SG->addRule(std::make_shared<IsFMA>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Round N + 2 Exp
@@ -1807,7 +1800,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG->addRule(std::make_shared<IsPipeExp>(TII, SG->getSGID(), true));
SG->addRule(std::make_shared<LessThanNSuccs>(8, TII, SG->getSGID(),
HasChainBetweenCvt));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
}
@@ -1815,7 +1808,7 @@ bool MFMAExpInterleaveOpt::applyIGLPStrategy(
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, MFMAEnablement * 2, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<OccursAfterExp>(TII, SG->getSGID(), true));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
return true;
}
@@ -1851,11 +1844,11 @@ bool MFMAExpSimpleInterleaveOpt::applyIGLPStrategy(
for (unsigned I = 0; I < MFMACount * 3; ++I) {
SchedGroup *SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::TRANS, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
return true;
@@ -2164,11 +2157,11 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
for (unsigned I = 0; I < MFMACount; I++) {
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VALU, 2, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
}
@@ -2181,21 +2174,21 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS_READ, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<EnablesInitialMFMA>(TII, SG->getSGID(), true));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
// Interleave MFMA with DS_READ prefetch
for (unsigned I = 4; I < DSRCount; ++I) {
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS_READ, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Phase 2a: Loop carried dependency with V_PERM
@@ -2205,34 +2198,34 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsSuccOfPrevGroup>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
1, TII, SG->getSGID(), true));
SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
3, TII, SG->getSGID(), true));
SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Phase 2b: Loop carried dependency without V_PERM
@@ -2241,16 +2234,16 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
for (unsigned I = DSWWithPermCount; I < DSWCount; I++) {
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
// Phase 2c: Loop carried dependency with V_PERM, VMEM_READs are
@@ -2262,52 +2255,52 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsSuccOfPrevGroup>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<IsSuccOfPrevGroup>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
2, TII, SG->getSGID(), true));
SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
4, TII, SG->getSGID(), true));
SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID()));
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
- SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ SG->findCandidateSUnits(SyncedInstrs[SG->getSyncID()]);
}
return true;
@@ -2575,30 +2568,6 @@ bool SchedGroup::canAddSU(SUnit &SU) const {
return std::all_of(B, E, [this](MachineInstr &MI) { return canAddMI(MI); });
}
-void SchedGroup::initSchedBarrier() {
- assert(!MaxSize && "SCHED_BARRIERs don't have a size.");
- assert(Collection.empty());
-
- for (auto &SU : DAG->SUnits) {
- if (canAddSU(SU))
- add(SU);
- }
-}
-
-void SchedGroup::initSchedGroupBarrier(
- std::vector<SUnit>::reverse_iterator RIter,
- SUnitsToCandidateSGsMap &SyncedInstrs) {
- assert(MaxSize && "SCHED_GROUP_BARRIER always has size.");
- assert(Collection.empty());
-
- // TODO Do not create SchedGroups with MaxSize 0?
- assert((MaxSize == 0 || !isFull()) && "Nothing added to Collection");
- add(*RIter);
- (*MaxSize)++;
-
- findCandidateSUnits(RIter, DAG->SUnits.rend(), SyncedInstrs);
-}
-
template <class T>
void SchedGroup::findCandidateSUnits(T Begin, T End,
SUnitsToCandidateSGsMap &SyncedInstrs) {
@@ -2608,7 +2577,7 @@ void SchedGroup::findCandidateSUnits(T Begin, T End,
});
}
-void SchedGroup::initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs) {
+void SchedGroup::findCandidateSUnits(SUnitsToCandidateSGsMap &SyncedInstrs) {
findCandidateSUnits(DAG->SUnits.rbegin(), DAG->SUnits.rend(), SyncedInstrs);
}
@@ -2660,7 +2629,10 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
auto InvertedMask =
invertSchedBarrierMask((SchedGroupMask)MI.getOperand(0).getImm());
SchedGroup SG(InvertedMask, std::nullopt, DAG, TII);
- SG.initSchedBarrier();
+
+ for (SUnit &SU : DAG->SUnits)
+ if (SG.canAddSU(SU))
+ SG.add(SU);
// Preserve original instruction ordering relative to the SCHED_BARRIER.
SG.link(
@@ -2716,10 +2688,11 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
int32_t Size = SGB.getOperand(1).getImm();
int32_t SyncID = SGB.getOperand(2).getImm();
+ Size++; // Make room for the SCHED_GROUP_BARRIER instruction
auto &SG = SyncedSchedGroups[SyncID].emplace_back((SchedGroupMask)SGMask,
Size, SyncID, DAG, TII);
-
- SG.initSchedGroupBarrier(RIter, SyncedInstrs[SG.getSyncID()]);
+ SG.add(*RIter);
+ SG.findCandidateSUnits(RIter, SG.DAG->SUnits.rend(), SyncedInstrs[SG.getSyncID()]);
}
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
>From 8cb0a463e9dc7e3659d91500e79479befd4d13bf Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Mon, 2 Mar 2026 08:56:11 -0500
Subject: [PATCH 6/6] clang-format changes
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index f92ada7e94e40..63bcb5c62a77b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2692,7 +2692,8 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
auto &SG = SyncedSchedGroups[SyncID].emplace_back((SchedGroupMask)SGMask,
Size, SyncID, DAG, TII);
SG.add(*RIter);
- SG.findCandidateSUnits(RIter, SG.DAG->SUnits.rend(), SyncedInstrs[SG.getSyncID()]);
+ SG.findCandidateSUnits(RIter, SG.DAG->SUnits.rend(),
+ SyncedInstrs[SG.getSyncID()]);
}
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
More information about the llvm-commits
mailing list