[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
Akshat Oke via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Sep 25 04:25:44 PDT 2024
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939
>From 3d8720930eaf0acd31c39722c98da085066ed315 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 25 Sep 2024 11:21:04 +0000
Subject: [PATCH 1/2] [AMDGPU] Add tests for SIPreAllocateWWMRegs
---
.../AMDGPU/si-pre-allocate-wwm-regs.mir | 26 +++++++++++++++++++
.../si-pre-allocate-wwm-sgpr-spills.mir | 21 +++++++++++++++
2 files changed, 47 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
new file mode 100644
index 00000000000000..f2db299f575f5e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr1
+ ; CHECK-LABEL: name: pre_allocate_wwm_regs_strict
+ ; CHECK: liveins: $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
+ ; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+ %0:vgpr_32 = IMPLICIT_DEF
+ renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+ %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %25:vgpr_32 = V_MOV_B32_dpp %24:vgpr_32(tied-def 0), %0:vgpr_32, 323, 12, 15, 0, implicit $exec
+ $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+ %2:vgpr_32 = COPY %0:vgpr_32
+...
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
new file mode 100644
index 00000000000000..f0efe74878d831
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_spill_to_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr1
+ ; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
+ ; CHECK: liveins: $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+ %0:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32
+ %2:vgpr_32 = COPY %0:vgpr_32
+...
+
>From 0d0cd3fb0bdc41731c89492dbe34a1ebf939c52e Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 24 Sep 2024 11:41:18 +0000
Subject: [PATCH 2/2] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++-
.../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 60 ++++++++++++-------
llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++++++++++
.../AMDGPU/si-pre-allocate-wwm-regs.mir | 1 +
.../si-pre-allocate-wwm-sgpr-spills.mir | 1 +
7 files changed, 79 insertions(+), 27 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b2dd354e496a2e..c0fd5e4625895a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
FunctionPass *createLowerWWMCopiesPass();
FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
FunctionPass *createSIFormMemoryClausesPass();
FunctionPass *createSIPostRABundlerPass();
@@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID;
void initializeSIOptimizeExecMaskingPass(PassRegistry &);
extern char &SIOptimizeExecMaskingID;
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
extern char &AMDGPUImageIntrinsicOptimizerID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0ebf34c901c142..174a90f0aa419d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
+MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
#undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fdee0819b502..9a28c648e2c4ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -41,6 +41,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIPeepholeSDWA.h"
+#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -461,7 +462,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILateBranchLoweringPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
- initializeSIPreAllocateWWMRegsPass(*PR);
+ initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesPass(*PR);
initializeSIPostRABundlerPass(*PR);
initializeGCNCreateVOPDPass(*PR);
@@ -1443,7 +1444,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsLegacyID);
- addPass(&SIPreAllocateWWMRegsID);
+ addPass(&SIPreAllocateWWMRegsLegacyID);
addPass(createVGPRAllocPass(false));
@@ -1467,7 +1468,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsLegacyID);
- addPass(&SIPreAllocateWWMRegsID);
+ addPass(&SIPreAllocateWWMRegsLegacyID);
addPass(createVGPRAllocPass(true));
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 0635cab7b872e2..c1d7a464a81537 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "SIPreAllocateWWMRegs.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,7 +35,7 @@ static cl::opt<bool>
namespace {
-class SIPreAllocateWWMRegs : public MachineFunctionPass {
+class SIPreAllocateWWMRegs {
private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
@@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
#ifndef NDEBUG
void printWWMInfo(const MachineInstr &MI);
#endif
+ bool processDef(MachineOperand &MO);
+ void rewriteRegs(MachineFunction &MF);
+
+public:
+ SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
+ VirtRegMap *VRM)
+ : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
+ bool run(MachineFunction &MF);
+};
+class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
public:
static char ID;
- SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
- initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
- }
+ SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -65,28 +74,24 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
-private:
- bool processDef(MachineOperand &MO);
- void rewriteRegs(MachineFunction &MF);
};
} // End anonymous namespace.
-INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
- "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperPass)
-INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
- "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
-char SIPreAllocateWWMRegs::ID = 0;
+char SIPreAllocateWWMRegsLegacy::ID = 0;
-char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
-FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
- return new SIPreAllocateWWMRegs();
+FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
+ return new SIPreAllocateWWMRegsLegacy();
}
bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
@@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
#endif
-bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
+ auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ auto *Matrix = &getAnalysis<LiveRegMatrixWrapperPass>().getLRM();
+ auto *VRM = &getAnalysis<VirtRegMapWrapperPass>().getVRM();
+ return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+}
+
+bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
- Matrix = &getAnalysis<LiveRegMatrixWrapperPass>().getLRM();
- VRM = &getAnalysis<VirtRegMapWrapperPass>().getVRM();
-
RegClassInfo.runOnMachineFunction(MF);
bool PreallocateSGPRSpillVGPRs =
@@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
rewriteRegs(MF);
return true;
}
+
+PreservedAnalyses
+SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
+ auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
+ SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
new file mode 100644
index 00000000000000..b86f7fe9213af0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
@@ -0,0 +1,30 @@
+//===--- SIPreAllocateWWMRegs.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class SIPreAllocateWWMRegsPass
+ : public PassInfoMixin<SIPreAllocateWWMRegsPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+
+ MachineFunctionProperties getRequiredProperties() {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
index f2db299f575f5e..27df8d0401e2e8 100644
--- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
index f0efe74878d831..d5508906519879 100644
--- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -amdgpu-prealloc-sgpr-spill-vgprs -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
---
More information about the llvm-branch-commits
mailing list