[llvm] 8529238 - [AMDGPU][NewPM] Port AMDGPUInsertDelayAlu to NPM (#128003)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 20:20:13 PST 2025
Author: Akshat Oke
Date: 2025-02-26T09:50:09+05:30
New Revision: 852923822fd085d304988c24f9b02edebe5e7903
URL: https://github.com/llvm/llvm-project/commit/852923822fd085d304988c24f9b02edebe5e7903
DIFF: https://github.com/llvm/llvm-project/commit/852923822fd085d304988c24f9b02edebe5e7903.diff
LOG: [AMDGPU][NewPM] Port AMDGPUInsertDelayAlu to NPM (#128003)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 428355a739628..1f2431fa34e9a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -270,6 +270,12 @@ struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
ScanOptions ScanImpl;
};
+struct AMDGPUInsertDelayAluPass
+ : public PassInfoMixin<AMDGPUInsertDelayAluPass> {
+ PreservedAnalyses run(MachineFunction &F,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
@@ -416,7 +422,7 @@ extern char &SIMemoryLegalizerID;
void initializeSIModeRegisterPass(PassRegistry&);
extern char &SIModeRegisterID;
-void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
+void initializeAMDGPUInsertDelayAluLegacyPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
void initializeSIInsertHardClausesPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
index 3f2bb5df8836b..b3e371cdff8fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
@@ -23,22 +23,13 @@ using namespace llvm;
namespace {
-class AMDGPUInsertDelayAlu : public MachineFunctionPass {
+class AMDGPUInsertDelayAlu {
public:
- static char ID;
-
const SIInstrInfo *SII;
const TargetRegisterInfo *TRI;
const TargetSchedModel *SchedModel;
- AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
// Return true if MI waits for all outstanding VALU instructions to complete.
static bool instructionWaitsForVALU(const MachineInstr &MI) {
// These instruction types wait for VA_VDST==0 before issuing.
@@ -416,10 +407,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
return Changed;
}
- bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()))
- return false;
-
+ bool run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
<< "\n");
@@ -454,11 +442,39 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
}
};
+class AMDGPUInsertDelayAluLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUInsertDelayAluLegacy() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+ AMDGPUInsertDelayAlu Impl;
+ return Impl.run(MF);
+ }
+};
} // namespace
-char AMDGPUInsertDelayAlu::ID = 0;
+PreservedAnalyses
+AMDGPUInsertDelayAluPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ if (!AMDGPUInsertDelayAlu().run(MF))
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+} // end namespace llvm
+
+char AMDGPUInsertDelayAluLegacy::ID = 0;
-char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID;
+char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAluLegacy::ID;
-INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU",
- false, false)
+INITIALIZE_PASS(AMDGPUInsertDelayAluLegacy, DEBUG_TYPE,
+ "AMDGPU Insert Delay ALU", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index e89d84c8a105f..64e07e4041cd3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -96,6 +96,7 @@ FUNCTION_PASS_WITH_PARAMS(
#ifndef MACHINE_FUNCTION_PASS
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
@@ -122,7 +123,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 828c1702ae07a..7bf6e8f671db8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -533,7 +533,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowLegacyPass(*PR);
- initializeAMDGPUInsertDelayAluPass(*PR);
+ initializeAMDGPUInsertDelayAluLegacyPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
@@ -2147,6 +2147,46 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
Base::addPostRegAlloc(addPass);
}
+void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
+ if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
+ // TODO: addPass(GCNCreateVOPDPass());
+ }
+ // TODO: addPass(SIMemoryLegalizerPass());
+ // TODO: addPass(SIInsertWaitcntsPass());
+
+ // TODO: addPass(SIModeRegisterPass());
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
+ // TODO: addPass(SIInsertHardClausesPass());
+ }
+
+ // addPass(SILateBranchLoweringPass());
+ if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
+ // TODO: addPass(AMDGPUSetWavePriorityPass());
+ }
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
+ // TODO: addPass(SIPreEmitPeepholePass());
+ }
+
+ // The hazard recognizer that runs as part of the post-ra scheduler does not
+ // guarantee to be able handle all hazards correctly. This is because if there
+ // are multiple scheduling regions in a basic block, the regions are scheduled
+ // bottom up, so when we begin to schedule a region we don't know what
+ // instructions were emitted directly before it.
+ //
+ // Here we add a stand-alone hazard recognizer pass which can handle all
+ // cases.
+ // TODO: addPass(PostRAHazardRecognizerPass());
+ addPass(AMDGPUWaitSGPRHazardsPass());
+
+ if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
+ addPass(AMDGPUInsertDelayAluPass());
+ }
+
+ // TODO: addPass(BranchRelaxationPass());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index eb5a9ca1f86d6..3df4115324ac2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -180,6 +180,7 @@ class AMDGPUCodeGenPassBuilder
void addPreRewrite(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
void addPostRegAlloc(AddMachinePass &) const;
+ void addPreEmitPass(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
index 266da50f6e543..ea8c7c956f776 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -passes=amdgpu-insert-delay-alu %s -o - | FileCheck %s
---
name: valu_dep_1
More information about the llvm-commits
mailing list