[llvm] [AMDGPU] Post-RA AGPR copy elimination pass (PR #153901)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 16:40:26 PDT 2025
================
@@ -0,0 +1,247 @@
+//===-- AMDGPUEliminateAGPRToVGPRCopy.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file \brief TODO
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-eliminate-agpr-to-vgpr-copy"
+
+STATISTIC(NumEliminated, "Number of copies eliminated");
+
+namespace {
+
+class AMDGPUEliminateAGPRToVGPRCopyImpl {
+ const GCNSubtarget &ST;
+ const SIInstrInfo &TII;
+ const SIRegisterInfo &TRI;
+ MachineRegisterInfo &MRI;
+ VirtRegMap &VRM;
+ LiveRegMatrix ‎
+ LiveIntervals &LIS;
+
+public:
+ AMDGPUEliminateAGPRToVGPRCopyImpl(MachineFunction &MF, VirtRegMap &VRM,
+ LiveRegMatrix &LRM, LiveIntervals &LIS)
+ : ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
+ TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
+ LIS(LIS) {}
+
+ bool areAllUsesCompatible(Register Reg) const;
+
+ bool run(MachineFunction &MF) const;
+};
+
+bool AMDGPUEliminateAGPRToVGPRCopyImpl::areAllUsesCompatible(
+ Register Reg) const {
+ return all_of(MRI.use_operands(Reg), [&](const MachineOperand &MO) {
+ const MachineInstr &ParentMI = *MO.getParent();
+ if (!SIInstrInfo::isMFMA(ParentMI))
+ return false;
+ return &MO == TII.getNamedOperand(ParentMI, AMDGPU::OpName::src0) ||
+ &MO == TII.getNamedOperand(ParentMI, AMDGPU::OpName::src1);
+ });
+}
+
+bool AMDGPUEliminateAGPRToVGPRCopyImpl::run(MachineFunction &MF) const {
+ // This only applies on subtargets that have a configurable AGPR vs. VGPR
+ // allocation.
+ if (!ST.hasGFX90AInsts())
+ return false;
+
+ // Early exit if no AGPRs were assigned.
+ if (!LRM.isPhysRegUsed(AMDGPU::AGPR0))
+ return false;
+
+ bool MadeChange = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &CopyMI : make_early_inc_range(MBB)) {
+ // Find full copies...
+ if (!CopyMI.isFullCopy())
+ continue;
+
+ // ... whose destination was mapped to a VGPR or AGPR...
+ Register DstReg = CopyMI.getOperand(0).getReg();
+ if (!DstReg.isVirtual())
+ continue;
+ Register DstPhysReg = VRM.getPhys(DstReg);
+ if (!DstPhysReg)
+ continue;
+ const TargetRegisterClass *DstRC = TRI.getPhysRegBaseClass(DstPhysReg);
+ if (!TRI.hasVectorRegisters(DstRC) || TRI.hasSGPRs(DstRC))
+ continue;
+
+ // ... and whose source was mapped to an AGPR.
+ Register SrcReg = CopyMI.getOperand(1).getReg();
+ if (!SrcReg.isVirtual() || SrcReg == DstReg)
+ continue;
+ Register SrcPhysReg = VRM.getPhys(SrcReg);
+ if (!SrcPhysReg)
+ continue;
+ const TargetRegisterClass *SrcRC = TRI.getPhysRegBaseClass(SrcPhysReg);
+ if (!TRI.isAGPRClass(SrcRC))
+ continue;
+
+ bool DstIsAGPR = TRI.hasAGPRs(DstRC);
+
+ LLVM_DEBUG({
+ dbgs() << "AGPR->AVGPR copy: " << CopyMI;
+ dbgs() << " "
+ << printReg(DstReg, &TRI, CopyMI.getOperand(0).getSubReg(), &MRI)
+ << " <-> " << printReg(DstPhysReg, &TRI, 0, &MRI) << "\n";
+ dbgs() << " "
+ << printReg(SrcReg, &TRI, CopyMI.getOperand(1).getSubReg(), &MRI)
+ << " <-> " << printReg(SrcPhysReg, &TRI, 0, &MRI) << "\n";
+ });
+
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ const VNInfo *SrcVNI = SrcLI.getVNInfoAt(LIS.getInstructionIndex(CopyMI));
+ assert(SrcVNI && "VNI must exist");
+
+ bool AllUsesCompatible =
+ all_of(MRI.use_operands(DstReg), [&](const MachineOperand &MO) {
+ // Destination's use must be src0/src1 operands of an MFMA or
+ // another copy.
+ const MachineInstr &UseMI = *MO.getParent();
+ if (!DstIsAGPR) {
+ if (SIInstrInfo::isMFMA(UseMI)) {
+ if (&MO != TII.getNamedOperand(UseMI, AMDGPU::OpName::src0) &&
+ &MO != TII.getNamedOperand(UseMI, AMDGPU::OpName::src1)) {
+ LLVM_DEBUG(dbgs()
+ << " Incompatible MFMA operand: " << UseMI);
+ return false;
+ }
+ } else if (!UseMI.isFullCopy()) {
+ LLVM_DEBUG(dbgs() << " Incompatible user: " << UseMI);
+ return false;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " Skipping user check (dst is AGPR)\n");
+ }
+
+ // Source must be available at use point.
+ const VNInfo *UseVNI =
+ SrcLI.getVNInfoAt(LIS.getInstructionIndex(UseMI));
+ if (SrcVNI != UseVNI) {
+ LLVM_DEBUG(dbgs() << " AGPR no longer available at " << UseMI);
+ }
----------------
shiltian wrote:
```suggestion
if (SrcVNI != UseVNI)
LLVM_DEBUG(dbgs() << " AGPR no longer available at " << UseMI);
```
https://github.com/llvm/llvm-project/pull/153901
More information about the llvm-commits
mailing list