[llvm] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM (PR #130065)

Akshat Oke via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 2 04:03:10 PDT 2025


https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130065

>From b561161a3b9c43fdd7b0aa15c1c09252053b5997 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 6 Mar 2025 06:20:13 +0000
Subject: [PATCH 1/2] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  9 +++++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  7 ++---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  | 31 ++++++++++++++-----
 .../AMDGPU/insert-handle-flat-vmem-ds.mir     |  1 +
 ...ort-exec-branches-special-instructions.mir |  1 +
 .../CodeGen/AMDGPU/set-gpr-idx-peephole.mir   |  1 +
 7 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 278f10a670070..03cd45d7de6f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
 void initializeSILowerControlFlowLegacyPass(PassRegistry &);
 extern char &SILowerControlFlowLegacyID;
 
-void initializeSIPreEmitPeepholePass(PassRegistry &);
+void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
 extern char &SIPreEmitPeepholeID;
 
 void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@@ -399,6 +399,13 @@ class SILateBranchLoweringPass
   static bool isRequired() { return true; }
 };
 
+class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 class AMDGPUSetWavePriorityPass
     : public PassInfoMixin<AMDGPUSetWavePriorityPass> {
 public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index bebb69d765654..538b1b181f643 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
+MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
 MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
@@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 
-DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4b5c70f09155f..d9ea5989a3b1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
-  initializeSIPreEmitPeepholePass(*PR);
+  initializeSIPreEmitPeepholeLegacyPass(*PR);
   initializeSILateBranchLoweringLegacyPass(*PR);
   initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
@@ -2171,9 +2171,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
     addPass(AMDGPUSetWavePriorityPass());
 
-  if (TM.getOptLevel() > CodeGenOptLevel::None) {
-    // TODO: addPass(SIPreEmitPeepholePass());
-  }
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+    addPass(SIPreEmitPeepholePass());
 
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if there
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 2bb70c138a50c..9db2118f2997b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 
 namespace {
 
-class SIPreEmitPeephole : public MachineFunctionPass {
+class SIPreEmitPeephole {
 private:
   const SIInstrInfo *TII = nullptr;
   const SIRegisterInfo *TRI = nullptr;
@@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass {
                              const MachineBasicBlock &To) const;
   bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
 
+public:
+  bool run(MachineFunction &MF);
+};
+
+class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIPreEmitPeephole() : MachineFunctionPass(ID) {
-    initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+  SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {
+    initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    return SIPreEmitPeephole().run(MF);
+  }
 };
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
+INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,
                 "SI peephole optimizations", false, false)
 
-char SIPreEmitPeephole::ID = 0;
+char SIPreEmitPeepholeLegacy::ID = 0;
 
-char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
+char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
 
 bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
   // Match:
@@ -410,7 +417,15 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
   return true;
 }
 
-bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
+                                 MachineFunctionAnalysisManager &MFAM) {
+  if (!SIPreEmitPeephole().run(MF))
+    return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses();
+}
+
+bool SIPreEmitPeephole::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();
   TRI = &TII->getRegisterInfo();
diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
index d89f306c96a36..2e8c8ca9c7a6c 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
index 20de119471ba3..92a9a195fc4c7 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole -verify-machineinstrs  %s -o - | FileCheck %s
 # Make sure mandatory skips are not removed around mode defs.
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 796a70cfe8a39..1d0a6db36ea3b 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
 
 ---
 name:            simple

>From ce8d79bac342b45e3e6d0aeffc104b0338d31128 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 11 Mar 2025 09:43:22 +0000
Subject: [PATCH 2/2] AS

---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp                    | 1 +
 llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir         | 2 +-
 .../AMDGPU/remove-short-exec-branches-special-instructions.mir  | 2 +-
 llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir               | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 9db2118f2997b..2c2ceedf8a2f6 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -422,6 +422,7 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
                                  MachineFunctionAnalysisManager &MFAM) {
   if (!SIPreEmitPeephole().run(MF))
     return PreservedAnalyses::all();
+
   return getMachineFunctionPassPreservedAnalyses();
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
index 2e8c8ca9c7a6c..785f5bed97904 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
index 92a9a195fc4c7..2c8739a87626e 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs  %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s
 # Make sure mandatory skips are not removed around mode defs.
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 1d0a6db36ea3b..002d43f937837 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
 
 ---
 name:            simple



More information about the llvm-commits mailing list