[llvm] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM (PR #130065)
Akshat Oke via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 2 04:03:10 PDT 2025
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130065
>From b561161a3b9c43fdd7b0aa15c1c09252053b5997 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 6 Mar 2025 06:20:13 +0000
Subject: [PATCH 1/2] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 9 +++++-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +-
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 31 ++++++++++++++-----
.../AMDGPU/insert-handle-flat-vmem-ds.mir | 1 +
...ort-exec-branches-special-instructions.mir | 1 +
.../CodeGen/AMDGPU/set-gpr-idx-peephole.mir | 1 +
7 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 278f10a670070..03cd45d7de6f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowLegacyPass(PassRegistry &);
extern char &SILowerControlFlowLegacyID;
-void initializeSIPreEmitPeepholePass(PassRegistry &);
+void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
extern char &SIPreEmitPeepholeID;
void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@@ -399,6 +399,13 @@ class SILateBranchLoweringPass
static bool isRequired() { return true; }
};
+class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
class AMDGPUSetWavePriorityPass
: public PassInfoMixin<AMDGPUSetWavePriorityPass> {
public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index bebb69d765654..538b1b181f643 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
+MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
@@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4b5c70f09155f..d9ea5989a3b1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterLegacyPass(*PR);
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
- initializeSIPreEmitPeepholePass(*PR);
+ initializeSIPreEmitPeepholeLegacyPass(*PR);
initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR);
@@ -2171,9 +2171,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
addPass(AMDGPUSetWavePriorityPass());
- if (TM.getOptLevel() > CodeGenOptLevel::None) {
- // TODO: addPass(SIPreEmitPeepholePass());
- }
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(SIPreEmitPeepholePass());
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 2bb70c138a50c..9db2118f2997b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
namespace {
-class SIPreEmitPeephole : public MachineFunctionPass {
+class SIPreEmitPeephole {
private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
@@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass {
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
+public:
+ bool run(MachineFunction &MF);
+};
+
+class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
public:
static char ID;
- SIPreEmitPeephole() : MachineFunctionPass(ID) {
- initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+ SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {
+ initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ return SIPreEmitPeephole().run(MF);
+ }
};
} // End anonymous namespace.
-INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
+INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,
"SI peephole optimizations", false, false)
-char SIPreEmitPeephole::ID = 0;
+char SIPreEmitPeepholeLegacy::ID = 0;
-char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
+char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// Match:
@@ -410,7 +417,15 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}
-bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ if (!SIPreEmitPeephole().run(MF))
+ return PreservedAnalyses::all();
+ return getMachineFunctionPassPreservedAnalyses();
+}
+
+bool SIPreEmitPeephole::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
index d89f306c96a36..2e8c8ca9c7a6c 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
index 20de119471ba3..92a9a195fc4c7 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs.
---
diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 796a70cfe8a39..1d0a6db36ea3b 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
---
name: simple
>From ce8d79bac342b45e3e6d0aeffc104b0338d31128 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 11 Mar 2025 09:43:22 +0000
Subject: [PATCH 2/2] AS
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 1 +
llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir | 2 +-
.../AMDGPU/remove-short-exec-branches-special-instructions.mir | 2 +-
llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir | 2 +-
4 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 9db2118f2997b..2c2ceedf8a2f6 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -422,6 +422,7 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
if (!SIPreEmitPeephole().run(MF))
return PreservedAnalyses::all();
+
return getMachineFunctionPassPreservedAnalyses();
}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
index 2e8c8ca9c7a6c..785f5bed97904 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
index 92a9a195fc4c7..2c8739a87626e 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs.
---
diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 1d0a6db36ea3b..002d43f937837 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
---
name: simple
More information about the llvm-commits
mailing list