[llvm-branch-commits] [llvm] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM (PR #125351)

Akshat Oke via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Feb 1 10:25:00 PST 2025


https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/125351

None

>From 7ef9eb592f3d84763de4d9a59cd6f6ea737e82f0 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Sat, 1 Feb 2025 18:21:24 +0000
Subject: [PATCH] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp     | 42 ++++++++++++++-----
 .../AMDGPU/SIOptimizeExecMaskingPreRA.h       | 24 +++++++++++
 .../CodeGen/AMDGPU/collapse-endcf-broken.mir  |  1 +
 ...ask-pre-ra-non-empty-but-used-interval.mir |  1 +
 7 files changed, 62 insertions(+), 13 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 31656c98ccd36fa..692eb937efa6743 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
-void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
+void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingPreRAID;
 
 void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 45e2f0d9097adfd..95d82ae407e6772 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -108,6 +108,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
 MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
+MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
@@ -127,7 +128,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d21f9c846c95130..27dc5a502d109b2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -46,6 +46,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "SIOptimizeExecMasking.h"
+#include "SIOptimizeExecMaskingPreRA.h"
 #include "SIOptimizeVGPRLiveRange.h"
 #include "SIPeepholeSDWA.h"
 #include "SIPreAllocateWWMRegs.h"
@@ -495,7 +496,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIFoldOperandsLegacyPass(*PR);
   initializeSIPeepholeSDWALegacyPass(*PR);
   initializeSIShrinkInstructionsLegacyPass(*PR);
-  initializeSIOptimizeExecMaskingPreRAPass(*PR);
+  initializeSIOptimizeExecMaskingPreRALegacyPass(*PR);
   initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
   initializeSILoadStoreOptimizerLegacyPass(*PR);
   initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 31f65d82a4d2bcb..2a8a398d7429d05 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -12,6 +12,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "SIOptimizeExecMaskingPreRA.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -25,7 +26,7 @@ using namespace llvm;
 
 namespace {
 
-class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+class SIOptimizeExecMaskingPreRA {
 private:
   const SIRegisterInfo *TRI;
   const SIInstrInfo *TII;
@@ -42,11 +43,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
   bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
   bool optimizeElseBranch(MachineBasicBlock &MBB);
 
+public:
+  SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {}
+  bool run(MachineFunction &MF);
+};
+
+class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
-    initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
+  SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) {
+    initializeSIOptimizeExecMaskingPreRALegacyPass(
+        *PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -64,18 +72,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
                       "SI optimize exec mask operations pre-RA", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
                     "SI optimize exec mask operations pre-RA", false, false)
 
-char SIOptimizeExecMaskingPreRA::ID = 0;
+char SIOptimizeExecMaskingPreRALegacy::ID = 0;
 
-char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
+char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID;
 
 FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
-  return new SIOptimizeExecMaskingPreRA();
+  return new SIOptimizeExecMaskingPreRALegacy();
 }
 
 // See if there is a def between \p AndIdx and \p SelIdx that needs to live
@@ -340,15 +348,29 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
   return true;
 }
 
-bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF,
+                                    MachineFunctionAnalysisManager &MFAM) {
+  auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  SIOptimizeExecMaskingPreRA(&LIS).run(MF);
+  return PreservedAnalyses::all();
+}
+
+bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
+    MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
+  auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  return SIOptimizeExecMaskingPreRA(LIS).run(MF);
+}
+
+bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TRI = ST.getRegisterInfo();
   TII = ST.getInstrInfo();
   MRI = &MF.getRegInfo();
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  // LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
 
   const bool Wave32 = ST.isWave32();
   AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
new file mode 100644
index 000000000000000..eca79c0d8dbfbb2
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
@@ -0,0 +1,24 @@
+//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------------------*-
+//C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
+#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIOptimizeExecMaskingPreRAPass
+    : public PassInfoMixin<SIOptimizeExecMaskingPreRAPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
index 7aea97a3053c7d5..2eb1f5d559651b5 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
 
 # FIXME: This is a miscompile, and the s_or_b64s need to be preserved.
 
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
index 63ee27e0f83ba6e..9673186e5ae3f64 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s
 
 # This sample can trigger a "Non-empty but used interval" assert in regalloc if
 # SIOptimizeExecMaskingPreRA does not update live intervals correctly.



More information about the llvm-branch-commits mailing list