[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

Akshat Oke via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Oct 7 03:28:47 PDT 2024


https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939

>From 786fb970b7b1d12a6c6c6888d2b5cfe51363287d Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 24 Sep 2024 11:41:18 +0000
Subject: [PATCH 1/2] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  6 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  7 ++-
 .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp    | 60 ++++++++++++-------
 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 25 ++++++++
 .../AMDGPU/si-pre-allocate-wwm-regs.mir       | 20 +++++++
 6 files changed, 92 insertions(+), 27 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 342d55e828bca5..95d0ad0f9dc96a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
 FunctionPass *createLowerWWMCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
 FunctionPass *createSIFormMemoryClausesPass();
 
 FunctionPass *createSIPostRABundlerPass();
@@ -212,8 +212,8 @@ extern char &SILateBranchLoweringPassID;
 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingID;
 
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
 
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0ebf34c901c142..174a90f0aa419d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
+MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 1f2148c2922de9..dc5330740f4a6b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -41,6 +41,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "SIPeepholeSDWA.h"
+#include "SIPreAllocateWWMRegs.h"
 #include "SIShrinkInstructions.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
@@ -506,7 +507,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSILateBranchLoweringPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
-  initializeSIPreAllocateWWMRegsPass(*PR);
+  initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
   initializeSIPostRABundlerPass(*PR);
   initializeGCNCreateVOPDPass(*PR);
@@ -1505,7 +1506,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
   addPass(&SILowerSGPRSpillsLegacyID);
 
   // To Allocate wwm registers used in whole quad mode operations (for shaders).
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   // For allocating other wwm register operands.
   addPass(createWWMRegAllocPass(false));
@@ -1537,7 +1538,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
   addPass(&SILowerSGPRSpillsLegacyID);
 
   // To Allocate wwm registers used in whole quad mode operations (for shaders).
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   // For allocating other whole wave mode registers.
   addPass(createWWMRegAllocPass(true));
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 07303e2aa726c5..f9109c01c8085b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SIPreAllocateWWMRegs.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,7 +35,7 @@ static cl::opt<bool>
 
 namespace {
 
-class SIPreAllocateWWMRegs : public MachineFunctionPass {
+class SIPreAllocateWWMRegs {
 private:
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
@@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
 #ifndef NDEBUG
   void printWWMInfo(const MachineInstr &MI);
 #endif
+  bool processDef(MachineOperand &MO);
+  void rewriteRegs(MachineFunction &MF);
+
+public:
+  SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
+                       VirtRegMap *VRM)
+      : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
+  bool run(MachineFunction &MF);
+};
 
+class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
-    initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
-  }
+  SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -65,28 +74,24 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
-
-private:
-  bool processDef(MachineOperand &MO);
-  void rewriteRegs(MachineFunction &MF);
 };
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
-                "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+                      "SI Pre-allocate WWM Registers", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
-INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
-                "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+                    "SI Pre-allocate WWM Registers", false, false)
 
-char SIPreAllocateWWMRegs::ID = 0;
+char SIPreAllocateWWMRegsLegacy::ID = 0;
 
-char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
 
-FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
-  return new SIPreAllocateWWMRegs();
+FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
+  return new SIPreAllocateWWMRegsLegacy();
 }
 
 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
@@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
 
 #endif
 
-bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
+  auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
+  auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
+  return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+}
+
+bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
   TRI = &TII->getRegisterInfo();
   MRI = &MF.getRegInfo();
 
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
-  Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
-  VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
-
   RegClassInfo.runOnMachineFunction(MF);
 
   bool PreallocateSGPRSpillVGPRs =
@@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
   rewriteRegs(MF);
   return true;
 }
+
+PreservedAnalyses
+SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
+                              MachineFunctionAnalysisManager &MFAM) {
+  auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
+  auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
+  SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
new file mode 100644
index 00000000000000..a0acde3afa77ce
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
@@ -0,0 +1,25 @@
+//===--- SIPreAllocateWWMRegs.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class SIPreAllocateWWMRegsPass
+    : public PassInfoMixin<SIPreAllocateWWMRegsPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
index 4dcad87a985c0b..a8b97864300aba 100644
--- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -2,6 +2,9 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2
 
+# RUN: llc -mtriple=amdgcn -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2
+
 ---
 
 name: pre_allocate_wwm_regs_strict
@@ -18,6 +21,16 @@ body: |
     ; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
     ; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
     ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+    ;
+    ; CHECK2-LABEL: name: pre_allocate_wwm_regs_strict
+    ; CHECK2: liveins: $sgpr1
+    ; CHECK2-NEXT: {{  $}}
+    ; CHECK2-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK2-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+    ; CHECK2-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK2-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
+    ; CHECK2-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+    ; CHECK2-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
     %0:vgpr_32 = IMPLICIT_DEF
     renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -32,6 +45,13 @@ tracksRegLiveness: true
 body: |
   bb.0:
     liveins: $sgpr1
+    ; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
+    ; CHECK: liveins: $sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: dead [[SI_SPILL_S32_TO_VGPR:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
+    ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+    ;
     ; CHECK2-LABEL: name: pre_allocate_wwm_spill_to_vgpr
     ; CHECK2: liveins: $sgpr1
     ; CHECK2-NEXT: {{  $}}

>From 02fafddfb120352a2c6fc5bbc2820fab1693a74c Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Mon, 7 Oct 2024 09:23:15 +0000
Subject: [PATCH 2/2] C++ mode

---
 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
index a0acde3afa77ce..99648176491687 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
@@ -1,4 +1,4 @@
-//===--- SIPreAllocateWWMRegs.h -------------------------------------------===//
+//===--- SIPreAllocateWWMRegs.h ---------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.



More information about the llvm-branch-commits mailing list