[llvm] [CodeGen][NewPM] Port SIWholeQuadMode to NPM. (PR #125833)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 03:14:38 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Christudasan Devadasan (cdevadas)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/125833.diff
9 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (+58-26)
- (added) llvm/lib/Target/AMDGPU/SIWholeQuadMode.h (+27)
- (modified) llvm/test/CodeGen/AMDGPU/licm-wwm.mir (+1)
- (modified) llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir (+1)
- (modified) llvm/test/CodeGen/AMDGPU/wqm-terminators.mir (+1)
- (modified) llvm/test/CodeGen/AMDGPU/wqm.mir (+1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 31656c98ccd36f..fa3496dd5c9c29 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createSIShrinkInstructionsLegacyPass();
FunctionPass *createSILoadStoreOptimizerLegacyPass();
-FunctionPass *createSIWholeQuadModePass();
+FunctionPass *createSIWholeQuadModeLegacyPass();
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
@@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
extern char &SILoadStoreOptimizerLegacyID;
-void initializeSIWholeQuadModePass(PassRegistry &);
+void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 45e2f0d9097adf..224515aeb26fb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
+MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
@@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4003fdba0555b8..1df03748332e5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -50,6 +50,7 @@
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
+#include "SIWholeQuadMode.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
@@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
- initializeSIWholeQuadModePass(*PR);
+ initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
initializeSILateBranchLoweringPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 87eb6d9e385d46..3293602db09017 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -67,6 +67,7 @@
///
//===----------------------------------------------------------------------===//
+#include "SIWholeQuadMode.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -148,11 +149,19 @@ struct WorkItem {
WorkItem(MachineInstr *MI) : MI(MI) {}
};
-class SIWholeQuadMode : public MachineFunctionPass {
+class SIWholeQuadMode {
+public:
+ SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
+ MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
+ : ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
+ TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
+ PDT(PDT) {}
+ bool run(MachineFunction &MF);
+
private:
+ const GCNSubtarget *ST;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
- const GCNSubtarget *ST;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
MachineDominatorTree *MDT;
@@ -225,12 +234,13 @@ class SIWholeQuadMode : public MachineFunctionPass {
void lowerInitExec(MachineInstr &MI);
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
bool &Changed);
+};
+class SIWholeQuadModeLegacy : public MachineFunctionPass {
public:
static char ID;
- SIWholeQuadMode() :
- MachineFunctionPass(ID) { }
+ SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -250,23 +260,22 @@ class SIWholeQuadMode : public MachineFunctionPass {
MachineFunctionProperties::Property::IsSSA);
}
};
-
} // end anonymous namespace
-char SIWholeQuadMode::ID = 0;
+char SIWholeQuadModeLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
- false)
+INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
- false)
+INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
+ false, false)
-char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
+char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;
-FunctionPass *llvm::createSIWholeQuadModePass() {
- return new SIWholeQuadMode;
+FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
+ return new SIWholeQuadModeLegacy;
}
#ifndef NDEBUG
@@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
return InsertPt;
}
-bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
+bool SIWholeQuadMode::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
<< " ------------- \n");
LLVM_DEBUG(MF.dump(););
@@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
SetInactiveInstrs.clear();
StateTransition.clear();
- ST = &MF.getSubtarget<GCNSubtarget>();
-
- TII = ST->getInstrInfo();
- TRI = &TII->getRegisterInfo();
- MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
- auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
- MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
- auto *PDTWrapper =
- getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
- PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
-
if (ST->isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
AndTermOpc = AMDGPU::S_AND_B32_term;
@@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
+ LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
+ auto *PDTWrapper =
+ getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
+ MachinePostDominatorTree *PDT =
+ PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
+ SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
+ return Impl.run(MF);
+}
+
+PreservedAnalyses
+SIWholeQuadModePass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ MFPropsModifier _(*this, MF);
+
+ LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ MachineDominatorTree *MDT =
+ MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
+ MachinePostDominatorTree *PDT =
+ MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
+ SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
+ bool Changed = Impl.run(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserve<SlotIndexesAnalysis>();
+ PA.preserve<LiveIntervalsAnalysis>();
+ PA.preserve<MachineDominatorTreeAnalysis>();
+ PA.preserve<MachinePostDominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
new file mode 100644
index 00000000000000..e30b46721841b4
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
@@ -0,0 +1,27 @@
+//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
+#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+
+ MachineFunctionProperties getClearedProperties() const {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
index fc20674971a716..85525aa4dbb098 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
# to create a second WWM region. This is an unwanted hoisting.
diff --git a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
index a4a9c04bb0c6a5..c02301446861d5 100644
--- a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s
---
# Test that we don't do silly things when there is no whole wave mode in the
diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
index 8d75bb3b1280f7..7656629a7b0098 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s
--- |
define amdgpu_ps void @exit_to_exact() {
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 4762760c4ba24b..99327e1d3c4985 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s
--- |
define amdgpu_ps void @test_strict_wwm_scc() {
``````````
</details>
https://github.com/llvm/llvm-project/pull/125833
More information about the llvm-commits
mailing list