[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
Akshat Oke via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Oct 7 03:28:47 PDT 2024
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939
>From 786fb970b7b1d12a6c6c6888d2b5cfe51363287d Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 24 Sep 2024 11:41:18 +0000
Subject: [PATCH 1/2] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++-
.../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 60 ++++++++++++-------
llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 25 ++++++++
.../AMDGPU/si-pre-allocate-wwm-regs.mir | 20 +++++++
6 files changed, 92 insertions(+), 27 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 342d55e828bca5..95d0ad0f9dc96a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
FunctionPass *createLowerWWMCopiesPass();
FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
FunctionPass *createSIFormMemoryClausesPass();
FunctionPass *createSIPostRABundlerPass();
@@ -212,8 +212,8 @@ extern char &SILateBranchLoweringPassID;
void initializeSIOptimizeExecMaskingPass(PassRegistry &);
extern char &SIOptimizeExecMaskingID;
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
extern char &AMDGPUImageIntrinsicOptimizerID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0ebf34c901c142..174a90f0aa419d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
+MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
#undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 1f2148c2922de9..dc5330740f4a6b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -41,6 +41,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIPeepholeSDWA.h"
+#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -506,7 +507,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILateBranchLoweringPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
- initializeSIPreAllocateWWMRegsPass(*PR);
+ initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesPass(*PR);
initializeSIPostRABundlerPass(*PR);
initializeGCNCreateVOPDPass(*PR);
@@ -1505,7 +1506,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
addPass(&SILowerSGPRSpillsLegacyID);
// To Allocate wwm registers used in whole quad mode operations (for shaders).
- addPass(&SIPreAllocateWWMRegsID);
+ addPass(&SIPreAllocateWWMRegsLegacyID);
// For allocating other wwm register operands.
addPass(createWWMRegAllocPass(false));
@@ -1537,7 +1538,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
addPass(&SILowerSGPRSpillsLegacyID);
// To Allocate wwm registers used in whole quad mode operations (for shaders).
- addPass(&SIPreAllocateWWMRegsID);
+ addPass(&SIPreAllocateWWMRegsLegacyID);
// For allocating other whole wave mode registers.
addPass(createWWMRegAllocPass(true));
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 07303e2aa726c5..f9109c01c8085b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "SIPreAllocateWWMRegs.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,7 +35,7 @@ static cl::opt<bool>
namespace {
-class SIPreAllocateWWMRegs : public MachineFunctionPass {
+class SIPreAllocateWWMRegs {
private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
@@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
#ifndef NDEBUG
void printWWMInfo(const MachineInstr &MI);
#endif
+ bool processDef(MachineOperand &MO);
+ void rewriteRegs(MachineFunction &MF);
+
+public:
+ SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
+ VirtRegMap *VRM)
+ : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
+ bool run(MachineFunction &MF);
+};
+class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
public:
static char ID;
- SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
- initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
- }
+ SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -65,28 +74,24 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
-private:
- bool processDef(MachineOperand &MO);
- void rewriteRegs(MachineFunction &MF);
};
} // End anonymous namespace.
-INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
- "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
-INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
- "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
-char SIPreAllocateWWMRegs::ID = 0;
+char SIPreAllocateWWMRegsLegacy::ID = 0;
-char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
-FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
- return new SIPreAllocateWWMRegs();
+FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
+ return new SIPreAllocateWWMRegsLegacy();
}
bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
@@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
#endif
-bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
+ auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
+ auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
+ return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+}
+
+bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
- Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
- VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
-
RegClassInfo.runOnMachineFunction(MF);
bool PreallocateSGPRSpillVGPRs =
@@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
rewriteRegs(MF);
return true;
}
+
+PreservedAnalyses
+SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
+ auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
+ SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
new file mode 100644
index 00000000000000..a0acde3afa77ce
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
@@ -0,0 +1,25 @@
+//===--- SIPreAllocateWWMRegs.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class SIPreAllocateWWMRegsPass
+ : public PassInfoMixin<SIPreAllocateWWMRegsPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
index 4dcad87a985c0b..a8b97864300aba 100644
--- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -2,6 +2,9 @@
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2
+# RUN: llc -mtriple=amdgcn -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2
+
---
name: pre_allocate_wwm_regs_strict
@@ -18,6 +21,16 @@ body: |
; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+ ;
+ ; CHECK2-LABEL: name: pre_allocate_wwm_regs_strict
+ ; CHECK2: liveins: $sgpr1
+ ; CHECK2-NEXT: {{ $}}
+ ; CHECK2-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK2-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK2-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK2-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
+ ; CHECK2-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+ ; CHECK2-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
%0:vgpr_32 = IMPLICIT_DEF
renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -32,6 +45,13 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr1
+ ; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
+ ; CHECK: liveins: $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[SI_SPILL_S32_TO_VGPR:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+ ;
; CHECK2-LABEL: name: pre_allocate_wwm_spill_to_vgpr
; CHECK2: liveins: $sgpr1
; CHECK2-NEXT: {{ $}}
>From 02fafddfb120352a2c6fc5bbc2820fab1693a74c Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Mon, 7 Oct 2024 09:23:15 +0000
Subject: [PATCH 2/2] C++ mode
---
llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
index a0acde3afa77ce..99648176491687 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
@@ -1,4 +1,4 @@
-//===--- SIPreAllocateWWMRegs.h -------------------------------------------===//
+//===--- SIPreAllocateWWMRegs.h ---------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
More information about the llvm-branch-commits
mailing list