[llvm] [CodeGen][NewPM] Port SIWholeQuadMode to NPM. (PR #125833)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 03:14:38 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Christudasan Devadasan (cdevadas)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/125833.diff


9 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (+58-26) 
- (added) llvm/lib/Target/AMDGPU/SIWholeQuadMode.h (+27) 
- (modified) llvm/test/CodeGen/AMDGPU/licm-wwm.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/wqm-terminators.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/wqm.mir (+1) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 31656c98ccd36f..fa3496dd5c9c29 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
 FunctionPass *createSIShrinkInstructionsLegacyPass();
 FunctionPass *createSILoadStoreOptimizerLegacyPass();
-FunctionPass *createSIWholeQuadModePass();
+FunctionPass *createSIWholeQuadModeLegacyPass();
 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
 FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
@@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
 void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
 extern char &SILoadStoreOptimizerLegacyID;
 
-void initializeSIWholeQuadModePass(PassRegistry &);
+void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
 extern char &SIWholeQuadModeID;
 
 void initializeSILowerControlFlowLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 45e2f0d9097adf..224515aeb26fb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
+MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
 
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
@@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 
 #undef DUMMY_MACHINE_FUNCTION_PASS
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4003fdba0555b8..1df03748332e5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -50,6 +50,7 @@
 #include "SIPeepholeSDWA.h"
 #include "SIPreAllocateWWMRegs.h"
 #include "SIShrinkInstructions.h"
+#include "SIWholeQuadMode.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
@@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIInsertHardClausesPass(*PR);
   initializeSIInsertWaitcntsPass(*PR);
   initializeSIModeRegisterPass(*PR);
-  initializeSIWholeQuadModePass(*PR);
+  initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
   initializeSIPreEmitPeepholePass(*PR);
   initializeSILateBranchLoweringPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 87eb6d9e385d46..3293602db09017 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -67,6 +67,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "SIWholeQuadMode.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -148,11 +149,19 @@ struct WorkItem {
   WorkItem(MachineInstr *MI) : MI(MI) {}
 };
 
-class SIWholeQuadMode : public MachineFunctionPass {
+class SIWholeQuadMode {
+public:
+  SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
+                  MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
+      : ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
+        TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
+        PDT(PDT) {}
+  bool run(MachineFunction &MF);
+
 private:
+  const GCNSubtarget *ST;
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
-  const GCNSubtarget *ST;
   MachineRegisterInfo *MRI;
   LiveIntervals *LIS;
   MachineDominatorTree *MDT;
@@ -225,12 +234,13 @@ class SIWholeQuadMode : public MachineFunctionPass {
   void lowerInitExec(MachineInstr &MI);
   MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
                                                   bool &Changed);
+};
 
+class SIWholeQuadModeLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIWholeQuadMode() :
-    MachineFunctionPass(ID) { }
+  SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -250,23 +260,22 @@ class SIWholeQuadMode : public MachineFunctionPass {
         MachineFunctionProperties::Property::IsSSA);
   }
 };
-
 } // end anonymous namespace
 
-char SIWholeQuadMode::ID = 0;
+char SIWholeQuadModeLegacy::ID = 0;
 
-INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
-                      false)
+INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
+                      false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
-                    false)
+INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
+                    false, false)
 
-char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
+char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;
 
-FunctionPass *llvm::createSIWholeQuadModePass() {
-  return new SIWholeQuadMode;
+FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
+  return new SIWholeQuadModeLegacy;
 }
 
 #ifndef NDEBUG
@@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
   return InsertPt;
 }
 
-bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
+bool SIWholeQuadMode::run(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
                     << " ------------- \n");
   LLVM_DEBUG(MF.dump(););
@@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   SetInactiveInstrs.clear();
   StateTransition.clear();
 
-  ST = &MF.getSubtarget<GCNSubtarget>();
-
-  TII = ST->getInstrInfo();
-  TRI = &TII->getRegisterInfo();
-  MRI = &MF.getRegInfo();
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
-  auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
-  MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
-  auto *PDTWrapper =
-      getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
-  PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
-
   if (ST->isWave32()) {
     AndOpc = AMDGPU::S_AND_B32;
     AndTermOpc = AMDGPU::S_AND_B32_term;
@@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
+
+bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
+  LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+  MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
+  auto *PDTWrapper =
+      getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
+  MachinePostDominatorTree *PDT =
+      PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
+  SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+SIWholeQuadModePass::run(MachineFunction &MF,
+                         MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+
+  LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  MachineDominatorTree *MDT =
+      MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
+  MachinePostDominatorTree *PDT =
+      MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
+  SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
+  bool Changed = Impl.run(MF);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserve<SlotIndexesAnalysis>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  PA.preserve<MachineDominatorTreeAnalysis>();
+  PA.preserve<MachinePostDominatorTreeAnalysis>();
+  return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
new file mode 100644
index 00000000000000..e30b46721841b4
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
@@ -0,0 +1,27 @@
+//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
+#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getClearedProperties() const {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
index fc20674971a716..85525aa4dbb098 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
 
 # Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
 # to create a second WWM region. This is an unwanted hoisting.
diff --git a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
index a4a9c04bb0c6a5..c02301446861d5 100644
--- a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o -  %s | FileCheck %s
 
 ---
 # Test that we don't do silly things when there is no whole wave mode in the
diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
index 8d75bb3b1280f7..7656629a7b0098 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o -  %s | FileCheck %s
 
 --- |
   define amdgpu_ps void @exit_to_exact() {
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 4762760c4ba24b..99327e1d3c4985 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o -  %s | FileCheck %s
 
 --- |
   define amdgpu_ps void @test_strict_wwm_scc() {

``````````

</details>


https://github.com/llvm/llvm-project/pull/125833


More information about the llvm-commits mailing list