[llvm] [AMDGPU] Fix AGPR_32 reg assign for mfma scale ops (PR #168964)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 09:25:40 PST 2025
https://github.com/hjagasiaAMD updated https://github.com/llvm/llvm-project/pull/168964
>From 1c2072fa094775a827144b9ce410f8df96faaf5b Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Thu, 20 Nov 2025 16:09:08 -0600
Subject: [PATCH 1/4] [AMDGPU] Fix AGPR_32 reg assign for mfma scale ops
In MFMA rewrite pass, prevent AGPR_32 reg class assignment for scale
operands, not permitted by instruction format.
---
llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 4 ++++
.../test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir | 6 +++---
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 89c16dadb4b41..b5e3187289160 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -302,6 +302,10 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
const TargetRegisterClass *EquivalentAGPRRegClass =
TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
+ // Do not reassign scale operands
+ if (EquivalentAGPRRegClass == &AMDGPU::AGPR_32RegClass)
+ return false;
+
MCPhysReg Assignable = AMDGPU::NoRegister;
if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
LRM.checkInterference(ReassignLI, PrefPhysReg) ==
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
index ab56c9982753f..12be806960b67 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
@@ -1,6 +1,6 @@
-# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s
-# CHECK: Illegal virtual register for instruction
-# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s
+# CHECK-NOT: Illegal virtual register for instruction
+# CHECK-NOT: Expected a VGPR_32 register, but got a AGPR_32 register
# Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand
# in vgpr_32 register to agpr_32, not permitted by instruction format.
>From d9405f21486a7a66f5770b9264f0c1ec45819800 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 26 Nov 2025 16:22:38 -0600
Subject: [PATCH 2/4] Check operand constraints and update mir checks.
---
.../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 29 ++++++++++++++-----
.../rewrite-vgpr-mfma-scale-to-agpr.mir | 8 +++--
2 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index b5e3187289160..d957e8e6f85a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -96,8 +96,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
/// Compute the register class constraints based on the uses of \p Reg,
/// excluding MFMA uses from which can be rewritten to change the register
- /// class constraint. This should be nearly identical to
- /// MachineRegisterInfo::recomputeRegClass.
+ /// class constraint. MFMA scale operands need to be constraint checked.
+ /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
/// \p RewriteCandidates will collect the set of MFMA instructions that need
/// to have the opcode mutated to perform the replacement.
@@ -151,9 +151,26 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
// We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
// effects of rewrite candidates. It just so happens that we can use
- // either AGPR or VGPR in src0/src1, so don't bother checking the
- // constraint effects of the individual operands.
+ // either AGPR or VGPR in src0/src1. We still need to check constraint
+ // effects for scale variant, which does not allow AGPR.
if (isRewriteCandidate(*MI)) {
+
+ int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
+ MachineInstrBuilder TmpMIB =
+ BuildMI(*MI->getParent(), MI->getIterator(), MI->getDebugLoc(),
+ TII.get(AGPROp));
+ for (const MachineOperand &TmpMO : MI->operands())
+ TmpMIB.add(TmpMO);
+ MachineInstr *TmpMI = TmpMIB.getInstr();
+ unsigned OpNo = &MO - &MI->getOperand(0);
+ const TargetRegisterClass *EquivalentAGPRRegClass =
+ TRI.getEquivalentAGPRClass(MRI.getRegClass(Reg));
+ const TargetRegisterClass *Allowed = TmpMI->getRegClassConstraintEffect(
+ OpNo, EquivalentAGPRRegClass, &TII, &TRI);
+ TmpMI->eraseFromParent();
+ if (!Allowed || Allowed != EquivalentAGPRRegClass)
+ return false;
+
const MachineOperand *VDst =
TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
const MachineOperand *Src2 =
@@ -302,10 +319,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
const TargetRegisterClass *EquivalentAGPRRegClass =
TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
- // Do not reassign scale operands
- if (EquivalentAGPRRegClass == &AMDGPU::AGPR_32RegClass)
- return false;
-
MCPhysReg Assignable = AMDGPU::NoRegister;
if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
LRM.checkInterference(ReassignLI, PrefPhysReg) ==
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
index 12be806960b67..e8c835c76a374 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir
@@ -1,7 +1,9 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s
-# CHECK-NOT: Illegal virtual register for instruction
-# CHECK-NOT: Expected a VGPR_32 register, but got a AGPR_32 register
-
+# CHECK: bb.1:
+# CHECK: dead %{{[0-9]+}}:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 4, 4, %{{[0-9]+}}, %[[REG:[0-9]+]], 4, 0, implicit $mode, implicit $exec
+# CHECK: %{{[0-9]+}}:agpr_32 = IMPLICIT_DEF
+# CHECK: %[[REG]]:vgpr_32 = COPY %{{[0-9]+}}
+
# Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand
# in vgpr_32 register to agpr_32, not permitted by instruction format.
---
>From 08fc310ca9d82acc03f516117cf2e01f65b8189b Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 1 Dec 2025 21:07:33 -0600
Subject: [PATCH 3/4] Get the static constraint of the known operand.
---
.../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index db3ce200d2aa7..fea4b517db960 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -155,21 +155,11 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
// either AGPR or VGPR in src0/src1. We still need to check constraint
// effects for scale variant, which does not allow AGPR.
if (isRewriteCandidate(*MI)) {
-
- int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
- MachineInstrBuilder TmpMIB =
- BuildMI(*MI->getParent(), MI->getIterator(), MI->getDebugLoc(),
- TII.get(AGPROp));
- for (const MachineOperand &TmpMO : MI->operands())
- TmpMIB.add(TmpMO);
- MachineInstr *TmpMI = TmpMIB.getInstr();
unsigned OpNo = &MO - &MI->getOperand(0);
- const TargetRegisterClass *EquivalentAGPRRegClass =
- TRI.getEquivalentAGPRClass(MRI.getRegClass(Reg));
- const TargetRegisterClass *Allowed = TmpMI->getRegClassConstraintEffect(
- OpNo, EquivalentAGPRRegClass, &TII, &TRI);
- TmpMI->eraseFromParent();
- if (!Allowed || Allowed != EquivalentAGPRRegClass)
+ int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
+ const MCInstrDesc &AGPRDesc = TII.get(AGPROp);
+ const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, OpNo);
+ if (!NewRC || !TRI.hasAGPRs(NewRC))
return false;
const MachineOperand *VDst =
>From aff0f88007a63151c3f1029c3e84803b686a3f35 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Tue, 2 Dec 2025 11:25:31 -0600
Subject: [PATCH 4/4] Update
llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index fea4b517db960..5769deb98db79 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -159,7 +159,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
const MCInstrDesc &AGPRDesc = TII.get(AGPROp);
const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, OpNo);
- if (!NewRC || !TRI.hasAGPRs(NewRC))
+ if (!TRI.hasAGPRs(NewRC))
return false;
const MachineOperand *VDst =
More information about the llvm-commits
mailing list