[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SILateBranchLowering to NPM (PR #130063)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Mar 9 22:11:31 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Akshat Oke (optimisan)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/130063.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+9-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-2)
- (modified) llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp (+30-10)
- (modified) llvm/test/CodeGen/AMDGPU/early-term.mir (+2)
- (modified) llvm/test/CodeGen/AMDGPU/readlane_exec0.mir (+1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b434676f85581..d1dc62e9cc526 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -213,7 +213,7 @@ extern char &SILowerControlFlowLegacyID;
void initializeSIPreEmitPeepholePass(PassRegistry &);
extern char &SIPreEmitPeepholeID;
-void initializeSILateBranchLoweringPass(PassRegistry &);
+void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
extern char &SILateBranchLoweringPassID;
void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
@@ -384,6 +384,14 @@ class SIInsertHardClausesPass : public PassInfoMixin<SIInsertHardClausesPass> {
MachineFunctionAnalysisManager &MFAM);
};
+class SILateBranchLoweringPass
+ : public PassInfoMixin<SILateBranchLoweringPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 3eabe087a8a33..318aad5590cda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
+MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 6c24fe5f1441a..b9d62cc9e4b63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
- initializeSILateBranchLoweringPass(*PR);
+ initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
@@ -2161,7 +2161,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
// TODO: addPass(SIInsertHardClausesPass());
}
- // addPass(SILateBranchLoweringPass());
+ addPass(SILateBranchLoweringPass());
+
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
// TODO: addPass(AMDGPUSetWavePriorityPass());
}
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index d02173f57ee37..0f5b6bd9374b0 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePassManager.h"
using namespace llvm;
@@ -23,7 +24,7 @@ using namespace llvm;
namespace {
-class SILateBranchLowering : public MachineFunctionPass {
+class SILateBranchLowering {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
@@ -33,14 +34,23 @@ class SILateBranchLowering : public MachineFunctionPass {
void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
public:
- static char ID;
+ SILateBranchLowering(MachineDominatorTree *MDT) : MDT(MDT) {}
+
+ bool run(MachineFunction &MF);
unsigned MovOpc;
Register ExecReg;
+};
- SILateBranchLowering() : MachineFunctionPass(ID) {}
+class SILateBranchLoweringLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+ SILateBranchLoweringLegacy() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ return SILateBranchLowering(MDT).run(MF);
+ }
StringRef getPassName() const override {
return "SI Final Branch Preparation";
@@ -55,15 +65,15 @@ class SILateBranchLowering : public MachineFunctionPass {
} // end anonymous namespace
-char SILateBranchLowering::ID = 0;
+char SILateBranchLoweringLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE,
"SI insert s_cbranch_execz instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
+INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE,
"SI insert s_cbranch_execz instructions", false, false)
-char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
+char &llvm::SILateBranchLoweringPassID = SILateBranchLoweringLegacy::ID;
static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
@@ -144,11 +154,21 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI,
MDT->insertEdge(&MBB, EarlyExitBlock);
}
-bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+llvm::SILateBranchLoweringPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
+ if (!SILateBranchLowering(MDT).run(MF))
+ return PreservedAnalyses::all();
+
+ return getMachineFunctionPassPreservedAnalyses()
+ .preserve<MachineDominatorTreeAnalysis>();
+}
+
+bool SILateBranchLowering::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
- MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir
index 77bc9729ee845..3d75d405a46d3 100644
--- a/llvm/test/CodeGen/AMDGPU/early-term.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-term.mir
@@ -2,6 +2,8 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
+
--- |
define amdgpu_ps void @early_term_scc0_end_block() {
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
index 6a286eafa6d58..a4c05aa781df7 100644
--- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
+++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
@@ -1,4 +1,5 @@
# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s
# GCN-LABEL: readlane_exec0
# GCN: bb.0
``````````
</details>
https://github.com/llvm/llvm-project/pull/130063
More information about the llvm-branch-commits
mailing list