[llvm] [AMDGPU]: Fall back to default mutations when iglp is not applied (PR #93418)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 17:35:11 PDT 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/93418
>From 39955131fbfb98f59f17c51001d9fb31f02b392a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Sun, 26 May 2024 09:27:48 -0700
Subject: [PATCH 1/2] [AMDGPU]: Fall back to default mutations when iglp is not
applied
Change-Id: I2e1f4f4610275d3d629c2b34ced331d78ea0ca06
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 21 +++++++++++++++----
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h | 5 +++--
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 ++++---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 14 +++++++------
.../CodeGen/AMDGPU/cluster-flat-loads.mir | 20 ++++++++++++++++++
5 files changed, 53 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 57769fe998d1f..e3f7248507953 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2337,6 +2337,8 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
ScheduleDAGMI *DAG;
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations;
+
// Organize lists of SchedGroups by their SyncID. SchedGroups /
// SCHED_GROUP_BARRIERs with different SyncIDs will have no edges added
// between then.
@@ -2379,7 +2381,10 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
AMDGPU::SchedulingPhase Phase = AMDGPU::SchedulingPhase::Initial;
IGroupLPDAGMutation() = default;
- IGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) : Phase(Phase) {}
+ IGroupLPDAGMutation(
+ AMDGPU::SchedulingPhase Phase,
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations)
+ : SavedMutations(SavedMutations), Phase(Phase) {}
};
unsigned SchedGroup::NumSchedGroups = 0;
@@ -2597,6 +2602,13 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
PS.solve();
return;
}
+
+ if (!SavedMutations)
+ return;
+
+ // We did not apply a mutation, fall back to SavedMutations
+ for (auto &m : *SavedMutations)
+ m->apply(DAG);
}
void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
@@ -2695,9 +2707,10 @@ namespace llvm {
/// same scheduling region (e.g. pre and post-RA scheduling / multiple
/// scheduling "phases"), we can reenter this mutation framework more than once
/// for a given region.
-std::unique_ptr<ScheduleDAGMutation>
-createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) {
- return std::make_unique<IGroupLPDAGMutation>(Phase);
+std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
+ AMDGPU::SchedulingPhase Phase,
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations) {
+ return std::make_unique<IGroupLPDAGMutation>(Phase, SavedMutations);
}
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
index aff7096f26d67..46ef4d702d002 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
@@ -20,8 +20,9 @@ namespace AMDGPU {
enum class SchedulingPhase { Initial, PreRAReentry, PostRA };
} // namespace AMDGPU
-std::unique_ptr<ScheduleDAGMutation>
-createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase);
+std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
+ AMDGPU::SchedulingPhase Phase,
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations);
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 20329dea60275..b9a0cfc5b130d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -471,7 +471,8 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
+ DAG->addMutation(
+ createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
@@ -481,7 +482,8 @@ static ScheduleDAGInstrs *
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
- DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
+ DAG->addMutation(
+ createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
return DAG;
}
@@ -893,7 +895,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
DAG->addMutation(
- createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
+ createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA, nullptr));
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
DAG->addMutation(createVOPDPairingMutation());
return DAG;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916..3882ab4cf58bf 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -713,8 +713,8 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
return false;
SavedMutations.swap(DAG.Mutations);
- DAG.addMutation(
- createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));
+ DAG.addMutation(createIGroupLPDAGMutation(
+ AMDGPU::SchedulingPhase::PreRAReentry, nullptr));
InitialOccupancy = DAG.MinOccupancy;
// Aggressivly try to reduce register pressure in the unclustered high RP
@@ -858,7 +858,8 @@ bool GCNSchedStage::initGCNRegion() {
StageID == GCNSchedStageID::ILPInitialSchedule;
DAG.addMutation(createIGroupLPDAGMutation(
IsInitialStage ? AMDGPU::SchedulingPhase::Initial
- : AMDGPU::SchedulingPhase::PreRAReentry));
+ : AMDGPU::SchedulingPhase::PreRAReentry,
+ &SavedMutations));
}
return true;
@@ -1577,15 +1578,16 @@ void GCNPostScheduleDAGMILive::schedule() {
if (HasIGLPInstrs) {
SavedMutations.clear();
SavedMutations.swap(Mutations);
- addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
+ addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA,
+ &SavedMutations));
}
ScheduleDAGMI::schedule();
-}
-void GCNPostScheduleDAGMILive::finalizeSchedule() {
if (HasIGLPInstrs)
SavedMutations.swap(Mutations);
+}
+void GCNPostScheduleDAGMILive::finalizeSchedule() {
ScheduleDAGMI::finalizeSchedule();
}
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
index 0d84dc0bdc53e..287e58dfc536e 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
@@ -18,3 +18,23 @@ body: |
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
%3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
...
+---
+# GCN-LABEL: name: cluster_flat_loads_iglp_opt
+# GCN: FLAT_LOAD_DWORD %0, 0
+# GCN-NEXT: FLAT_LOAD_DWORD %0, 4
+# GCN-NEXT: V_ADD_F32_e64
+name: cluster_flat_loads_iglp_opt
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
+ %3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ IGLP_OPT 2
+...
>From 21568e8b00afa41804a9d30e08737e579169014a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 17:33:59 -0700
Subject: [PATCH 2/2] Use inline register class
Change-Id: I55fe6465aed5b6d78f7e3f56db3b3062a08d429b
---
.../CodeGen/AMDGPU/cluster-flat-loads.mir | 26 ++++++-------------
1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
index 287e58dfc536e..ae614beb8d3a1 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
@@ -6,17 +6,12 @@
# GCN-NEXT: V_ADD_F32_e64
name: cluster_flat_loads
tracksRegLiveness: true
-registers:
- - { id: 0, class: vreg_64 }
- - { id: 1, class: vgpr_32 }
- - { id: 2, class: vgpr_32 }
- - { id: 3, class: vgpr_32 }
body: |
bb.0:
- %0 = IMPLICIT_DEF
- %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
- %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
- %3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %2:vgpr_32 = V_ADD_F32_e64 0, killed %1:vgpr_32, 0, 1, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
...
---
# GCN-LABEL: name: cluster_flat_loads_iglp_opt
@@ -25,16 +20,11 @@ body: |
# GCN-NEXT: V_ADD_F32_e64
name: cluster_flat_loads_iglp_opt
tracksRegLiveness: true
-registers:
- - { id: 0, class: vreg_64 }
- - { id: 1, class: vgpr_32 }
- - { id: 2, class: vgpr_32 }
- - { id: 3, class: vgpr_32 }
body: |
bb.0:
- %0 = IMPLICIT_DEF
- %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
- %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
- %3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %2:vgpr_32 = V_ADD_F32_e64 0, killed %1:vgpr_32, 0, 1, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = FLAT_LOAD_DWORD %0:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
IGLP_OPT 2
...
More information about the llvm-commits
mailing list