[llvm] [AMDGPU][NewPM] Port "GCNRewritePartialRegUses" pass to NPM (PR #126024)

Vikram Hegde via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 22:01:09 PST 2025


https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/126024

>From 930dfa431b245e3bd9af85249d2ad94f89914baf Mon Sep 17 00:00:00 2001
From: vikhegde <vikram.hegde at amd.com>
Date: Wed, 5 Feb 2025 15:00:33 +0530
Subject: [PATCH 1/2] [AMDGPU][NewPM] Port "GCNRewritePartialRegUses" pass to
 NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |   2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 +-
 .../AMDGPU/GCNRewritePartialRegUses.cpp       | 108 +++++++++++-------
 .../Target/AMDGPU/GCNRewritePartialRegUses.h  |  23 ++++
 .../AMDGPU/rewrite-partial-reg-uses-dbg.mir   |   1 +
 .../AMDGPU/rewrite-partial-reg-uses-gen.mir   |   1 +
 .../AMDGPU/rewrite-partial-reg-uses.mir       |   1 +
 8 files changed, 96 insertions(+), 45 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2c6b8828d5cfbab..048c39464be5418 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID;
 FunctionPass *createAMDGPUSetWavePriorityPass();
 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
 
-void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
+void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
 extern char &GCNRewritePartialRegUsesID;
 
 void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 41ad1445f47e927..62716a9d725d90d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
 #endif
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
+MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
@@ -119,7 +120,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fffd30b26dc1d50..dedfcfcb4d5b4f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -33,6 +33,7 @@
 #include "GCNDPPCombine.h"
 #include "GCNIterativeScheduler.h"
 #include "GCNPreRALongBranchReg.h"
+#include "GCNRewritePartialRegUses.h"
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
 #include "R600.h"
@@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeGCNNSAReassignPass(*PR);
   initializeGCNPreRAOptimizationsPass(*PR);
   initializeGCNPreRALongBranchRegLegacyPass(*PR);
-  initializeGCNRewritePartialRegUsesPass(*PR);
+  initializeGCNRewritePartialRegUsesLegacyPass(*PR);
   initializeGCNRegPressurePrinterPass(*PR);
   initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
   initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 077ccf36ea4fb28..cccb71d5f8e7287 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -28,6 +28,7 @@
 /// calculation and creates more possibilities for the code unaware of lanemasks
 //===----------------------------------------------------------------------===//
 
+#include "GCNRewritePartialRegUses.h"
 #include "AMDGPU.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIRegisterInfo.h"
@@ -44,25 +45,7 @@ using namespace llvm;
 
 namespace {
 
-class GCNRewritePartialRegUses : public MachineFunctionPass {
-public:
-  static char ID;
-  GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-    return "Rewrite Partial Register Uses";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addPreserved<LiveIntervalsWrapperPass>();
-    AU.addPreserved<SlotIndexesWrapperPass>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-private:
+class GCNRewritePartialRegUsesImpl {
   MachineRegisterInfo *MRI;
   const SIRegisterInfo *TRI;
   const TargetInstrInfo *TII;
@@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
   /// Cache for getAllocatableAndAlignedRegClassMask method:
   ///   AlignNumBits -> Class bitmask.
   mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
+
+public:
+  GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
+  bool run(MachineFunction &MF);
+};
+
+class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "Rewrite Partial Register Uses";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<LiveIntervalsWrapperPass>();
+    AU.addPreserved<SlotIndexesWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
 } // end anonymous namespace
 
 // TODO: move this to the tablegen and use binary search by Offset.
-unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
-                                             unsigned Size) const {
+unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
+                                                 unsigned Size) const {
   const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
   if (Inserted) {
     for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
@@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
   return I->second;
 }
 
-unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
-                                               unsigned RShift) const {
+unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
+                                                   unsigned RShift) const {
   unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
   return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
 }
 
-const uint32_t *
-GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
-                                               unsigned SubRegIdx) const {
+const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
+    const TargetRegisterClass *RC, unsigned SubRegIdx) const {
   const auto [I, Inserted] =
       SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
   if (Inserted) {
@@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
   return I->second;
 }
 
-const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
+const BitVector &
+GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
     unsigned AlignNumBits) const {
   const auto [I, Inserted] =
       AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
@@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
 }
 
 const TargetRegisterClass *
-GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
+GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
     const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
     unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
 
@@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
 }
 
 const TargetRegisterClass *
-GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
-                                        SubRegMap &SubRegs) const {
+GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
+                                            SubRegMap &SubRegs) const {
   unsigned CoverSubreg = AMDGPU::NoSubRegister;
   unsigned Offset = std::numeric_limits<unsigned>::max();
   unsigned End = 0;
@@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
 
 // Only the subrange's lanemasks of the original interval need to be modified.
 // Subrange for a covering subreg becomes the main range.
-void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
-                                                   Register NewReg,
-                                                   SubRegMap &SubRegs) const {
+void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
+    Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
   if (!LIS->hasInterval(OldReg))
     return;
 
@@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
 }
 
 const TargetRegisterClass *
-GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
+GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
   MachineInstr *MI = MO.getParent();
   return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
                           *MI->getParent()->getParent());
 }
 
-bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
+bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
   auto Range = MRI->reg_nodbg_operands(Reg);
   if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
         return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
@@ -476,12 +481,10 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
   return true;
 }
 
-bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
+bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
   TII = MF.getSubtarget().getInstrInfo();
-  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
-  LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
   bool Changed = false;
   for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
     Changed |= rewriteReg(Register::index2VirtReg(I));
@@ -489,11 +492,32 @@ bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-char GCNRewritePartialRegUses::ID;
+bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
+  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+  auto LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+  GCNRewritePartialRegUsesImpl Impl(LIS);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+GCNRewritePartialRegUsesPass::run(MachineFunction &MF,
+                                  MachineFunctionAnalysisManager &MFAM) {
+  auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
+  if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
+    return PreservedAnalyses::all();
+
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  PA.preserve<SlotIndexesAnalysis>();
+  return PA;
+}
+
+char GCNRewritePartialRegUsesLegacy::ID;
 
-char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
+char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
                       "Rewrite Partial Register Uses", false, false)
-INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
+INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
                     "Rewrite Partial Register Uses", false, false)
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
new file mode 100644
index 000000000000000..b2c3190b5c6ba0f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
@@ -0,0 +1,23 @@
+//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNRewritePartialRegUsesPass
+    : public PassInfoMixin<GCNRewritePartialRegUsesPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
index 85d0c054754d03d..ede043ce73a47fb 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
 --- |
   define void @test_vreg_96_w64() !dbg !5 {
   entry:
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
index 037f39df8c3e06e..79e9ce27376950a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
 ---
 name: test_subregs_composition_vreg_1024
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
index 07e49dcdafd8cc3..33007ee8a7c38d6 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
 ---
 name: test_subregs_composition_vreg_1024
 tracksRegLiveness: true

>From 5b5710323ab2da09d972fe06e2d892c2b9284b39 Mon Sep 17 00:00:00 2001
From: vikhegde <vikram.hegde at amd.com>
Date: Thu, 6 Feb 2025 14:26:29 +0530
Subject: [PATCH 2/2] avoid auto

---
 llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index cccb71d5f8e7287..c58d1b00a1002cc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -493,8 +493,9 @@ bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
 }
 
 bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
-  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
-  auto LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+  LiveIntervalsWrapperPass *LISWrapper =
+      getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+  LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
   GCNRewritePartialRegUsesImpl Impl(LIS);
   return Impl.run(MF);
 }



More information about the llvm-commits mailing list