[llvm] 93d35ad - AMDGPU: Delete FillMFMAShadowMutation (#123861)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 07:41:30 PST 2025
Author: Matt Arsenault
Date: 2025-01-22T22:41:25+07:00
New Revision: 93d35ad5f53a69606eac0dabb8ceef066ce4d407
URL: https://github.com/llvm/llvm-project/commit/93d35ad5f53a69606eac0dabb8ceef066ce4d407
DIFF: https://github.com/llvm/llvm-project/commit/93d35ad5f53a69606eac0dabb8ceef066ce4d407.diff
LOG: AMDGPU: Delete FillMFMAShadowMutation (#123861)
No test changes with this removed and it appears to
be obsolete.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
Removed:
llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ce0b10b804ba1a..cb662258b26672 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1075,7 +1075,6 @@ class GCNPassConfig final : public AMDGPUPassConfig {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 117afc4a8e8c60..413c2884c034ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -38,11 +38,6 @@ using namespace llvm;
#include "AMDGPUGenSubtargetInfo.inc"
#undef AMDGPUSubtarget
-static cl::opt<bool>
- EnablePowerSched("amdgpu-enable-power-sched",
- cl::desc("Enable scheduling to minimize mAI power bursts"),
- cl::init(false));
-
static cl::opt<bool> EnableVGPRIndexMode(
"amdgpu-vgpr-index-mode",
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
@@ -586,117 +581,6 @@ void GCNSubtarget::adjustSchedDependency(
}
}
-namespace {
-struct FillMFMAShadowMutation : ScheduleDAGMutation {
- const SIInstrInfo *TII;
-
- ScheduleDAGMI *DAG;
-
- FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
-
- bool isSALU(const SUnit *SU) const {
- const MachineInstr *MI = SU->getInstr();
- return MI && TII->isSALU(*MI) && !MI->isTerminator();
- }
-
- bool isVALU(const SUnit *SU) const {
- const MachineInstr *MI = SU->getInstr();
- return MI && TII->isVALU(*MI);
- }
-
- // Link as many SALU instructions in chain as possible. Return the size
- // of the chain. Links up to MaxChain instructions.
- unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
- SmallPtrSetImpl<SUnit *> &Visited) const {
- SmallVector<SUnit *, 8> Worklist({To});
- unsigned Linked = 0;
-
- while (!Worklist.empty() && MaxChain-- > 0) {
- SUnit *SU = Worklist.pop_back_val();
- if (!Visited.insert(SU).second)
- continue;
-
- LLVM_DEBUG(dbgs() << "Inserting edge from\n"; DAG->dumpNode(*From);
- dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
-
- if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))
- if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))
- ++Linked;
-
- for (SDep &SI : From->Succs) {
- SUnit *SUv = SI.getSUnit();
- if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&
- DAG->canAddEdge(SUv, SU))
- DAG->addEdge(SUv, SDep(SU, SDep::Artificial));
- }
-
- for (SDep &SI : SU->Succs) {
- SUnit *Succ = SI.getSUnit();
- if (Succ != SU && isSALU(Succ))
- Worklist.push_back(Succ);
- }
- }
-
- return Linked;
- }
-
- void apply(ScheduleDAGInstrs *DAGInstrs) override {
- const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
- if (!ST.hasMAIInsts())
- return;
- DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
- const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
- if (!TSchedModel || DAG->SUnits.empty())
- return;
-
- // Scan for MFMA long latency instructions and try to add a dependency
- // of available SALU instructions to give them a chance to fill MFMA
- // shadow. That is desirable to fill MFMA shadow with SALU instructions
- // rather than VALU to prevent power consumption bursts and throttle.
- auto LastSALU = DAG->SUnits.begin();
- auto E = DAG->SUnits.end();
- SmallPtrSet<SUnit *, 32> Visited;
- for (SUnit &SU : DAG->SUnits) {
- MachineInstr &MAI = *SU.getInstr();
- if (!TII->isMAI(MAI) ||
- MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
- MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
- continue;
-
- unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
-
- LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
- dbgs() << "Need " << Lat
- << " instructions to cover latency.\n");
-
- // Find up to Lat independent scalar instructions as early as
- // possible such that they can be scheduled after this MFMA.
- for (; Lat && LastSALU != E; ++LastSALU) {
- if (Visited.count(&*LastSALU))
- continue;
-
- if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||
- !DAG->canAddEdge(&*LastSALU, &SU))
- continue;
-
- Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
- }
- }
- }
-};
-} // namespace
-
-void GCNSubtarget::getPostRAMutations(
- std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
-}
-
-std::unique_ptr<ScheduleDAGMutation>
-GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
- return EnablePowerSched ? std::make_unique<FillMFMAShadowMutation>(&InstrInfo)
- : nullptr;
-}
-
unsigned GCNSubtarget::getNSAThreshold(const MachineFunction &MF) const {
if (getGeneration() >= AMDGPUSubtarget::GFX12)
return 0; // Not MIMG encoding.
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 3388bc3c5a8de1..e0b0000f757faf 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1575,13 +1575,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
- void getPostRAMutations(
- std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
- const override;
-
- std::unique_ptr<ScheduleDAGMutation>
- createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
-
bool isWave32() const {
return getWavefrontSize() == 32;
}
diff --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir
deleted file mode 100644
index 2c1880c88631ec..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir
+++ /dev/null
@@ -1,26 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=postmisched %s -o - -amdgpu-enable-power-sched=true 2>&1 | FileCheck %s
-# This test represents a pattern which caused power-sched to introduce cycles into the Scheduling graph. By virtue of this test not crashing indicates it has completed succesfully.
-
----
-name: power_sched_cycle_condition
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr1, $sgpr2_sgpr3, $sgpr1, $vgpr72_vgpr73, $vgpr2_vgpr3, $sgpr22, $sgpr6_sgpr7, $sgpr10_sgpr11
- ; CHECK-LABEL: name: power_sched_cycle_condition
- ; CHECK: liveins: $vgpr1, $sgpr2_sgpr3, $sgpr1, $vgpr72_vgpr73, $vgpr2_vgpr3, $sgpr22, $sgpr6_sgpr7, $sgpr10_sgpr11
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr2_sgpr3, 56, 0 :: (dereferenceable invariant load (s64), addrspace 4)
- ; CHECK-NEXT: $sgpr4 = S_LSHL_B32 killed $sgpr22, 1, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr22_sgpr23 = S_LOAD_DWORDX2_IMM killed $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64), addrspace 4)
- ; CHECK-NEXT: $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr1, killed $sgpr1, implicit $exec
- ; CHECK-NEXT: early-clobber $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr2_vgpr3, $vgpr72_vgpr73, 0, 0, 0, 0, implicit $mode, implicit $exec
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr2_sgpr3, 56, 0 :: (dereferenceable invariant load (s64), addrspace 4)
- $vgpr2 = nsw V_MUL_LO_U32_e64 $vgpr1, $sgpr1, implicit $exec
- $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr72_vgpr73, 0, 0, 0, 0, implicit $mode, implicit $exec
- $sgpr4 = S_LSHL_B32 $sgpr22, 1, implicit-def dead $scc
- $sgpr22_sgpr23 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64), addrspace 4)
-...
-
-
More information about the llvm-commits
mailing list