[llvm] [AMDGPU][NewPM] Port AMDGPUInsertDelayAlu to NPM (PR #128003)
Akshat Oke via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 05:02:14 PST 2025
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/128003
>From b47124654ac91c2380fe448c8543af82aceba97a Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 20 Feb 2025 12:33:53 +0000
Subject: [PATCH] [AMDGPU][NewPM] Port AMDGPUInsertDelayAlu to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 8 ++-
.../Target/AMDGPU/AMDGPUInsertDelayAlu.cpp | 52 ++++++++++++-------
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +-
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 42 ++++++++++++++-
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 1 +
.../AMDGPU/insert-delay-alu-literal.mir | 1 +
6 files changed, 85 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 42392e22643b2..1444115b98d65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -270,6 +270,12 @@ struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
ScanOptions ScanImpl;
};
+struct AMDGPUInsertDelayAluPass
+ : public PassInfoMixin<AMDGPUInsertDelayAluPass> {
+ PreservedAnalyses run(MachineFunction &F,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
@@ -416,7 +422,7 @@ extern char &SIMemoryLegalizerID;
void initializeSIModeRegisterPass(PassRegistry&);
extern char &SIModeRegisterID;
-void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
+void initializeAMDGPUInsertDelayAluLegacyPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
void initializeSIInsertHardClausesPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
index 3f2bb5df8836b..b3e371cdff8fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
@@ -23,22 +23,13 @@ using namespace llvm;
namespace {
-class AMDGPUInsertDelayAlu : public MachineFunctionPass {
+class AMDGPUInsertDelayAlu {
public:
- static char ID;
-
const SIInstrInfo *SII;
const TargetRegisterInfo *TRI;
const TargetSchedModel *SchedModel;
- AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
// Return true if MI waits for all outstanding VALU instructions to complete.
static bool instructionWaitsForVALU(const MachineInstr &MI) {
// These instruction types wait for VA_VDST==0 before issuing.
@@ -416,10 +407,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
return Changed;
}
- bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()))
- return false;
-
+ bool run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
<< "\n");
@@ -454,11 +442,39 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
}
};
+class AMDGPUInsertDelayAluLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUInsertDelayAluLegacy() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+ AMDGPUInsertDelayAlu Impl;
+ return Impl.run(MF);
+ }
+};
} // namespace
-char AMDGPUInsertDelayAlu::ID = 0;
+PreservedAnalyses
+AMDGPUInsertDelayAluPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ if (!AMDGPUInsertDelayAlu().run(MF))
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+} // end namespace llvm
+
+char AMDGPUInsertDelayAluLegacy::ID = 0;
-char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID;
+char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAluLegacy::ID;
-INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU",
- false, false)
+INITIALIZE_PASS(AMDGPUInsertDelayAluLegacy, DEBUG_TYPE,
+ "AMDGPU Insert Delay ALU", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index fd1341e8c91b2..468fbff31233c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -96,6 +96,7 @@ FUNCTION_PASS_WITH_PARAMS(
#ifndef MACHINE_FUNCTION_PASS
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
@@ -120,7 +121,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7c9377e61230b..f8431cbf05489 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -531,7 +531,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowLegacyPass(*PR);
- initializeAMDGPUInsertDelayAluPass(*PR);
+ initializeAMDGPUInsertDelayAluLegacyPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
@@ -2145,6 +2145,46 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
Base::addPostRegAlloc(addPass);
}
+void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
+ if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
+ // TODO: addPass(GCNCreateVOPDPass());
+ }
+ // TODO: addPass(SIMemoryLegalizerPass());
+ // TODO: addPass(SIInsertWaitcntsPass());
+
+ // TODO: addPass(SIModeRegisterPass());
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
+ // TODO: addPass(SIInsertHardClausesPass());
+ }
+
+ // addPass(SILateBranchLoweringPass());
+ if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
+ // TODO: addPass(AMDGPUSetWavePriorityPass());
+ }
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
+ // TODO: addPass(SIPreEmitPeepholePass());
+ }
+
+ // The hazard recognizer that runs as part of the post-ra scheduler does not
+ // guarantee to be able handle all hazards correctly. This is because if there
+ // are multiple scheduling regions in a basic block, the regions are scheduled
+ // bottom up, so when we begin to schedule a region we don't know what
+ // instructions were emitted directly before it.
+ //
+ // Here we add a stand-alone hazard recognizer pass which can handle all
+ // cases.
+ // TODO: addPass(PostRAHazardRecognizerPass());
+ addPass(AMDGPUWaitSGPRHazardsPass());
+
+ if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
+ addPass(AMDGPUInsertDelayAluPass());
+ }
+
+ // TODO: addPass(BranchRelaxationPass());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index eb5a9ca1f86d6..3df4115324ac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -180,6 +180,7 @@ class AMDGPUCodeGenPassBuilder
void addPreRewrite(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
void addPostRegAlloc(AddMachinePass &) const;
+ void addPreEmitPass(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
index 266da50f6e543..ea8c7c956f776 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -passes=amdgpu-insert-delay-alu %s -o - | FileCheck %s
---
name: valu_dep_1
More information about the llvm-commits
mailing list