[llvm-branch-commits] [llvm] AMDGPU: Handle rewriting VGPR MFMA fed from AGPR copy (PR #153022)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 11 07:41:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Previously we handled the inverse situation only.
---
Patch is 34.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153022.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+191-112)
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir (+9-68)
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll (+49-107)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 5206f32ec99e5..b71c70db5e6b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -14,6 +14,10 @@
/// MFMA opcode.
///
/// TODO:
+/// - Handle rewrites of phis. This must be more careful than normal about the
+/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
+/// loop, so it depends on the exact assignment of the copy.
+///
/// - Update LiveIntervals incrementally instead of recomputing from scratch
///
//===----------------------------------------------------------------------===//
@@ -60,6 +64,32 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
}
+ /// Find AV_* registers assigned to AGPRs (or virtual registers which were
+ /// already required to be AGPR).
+ ///
+ /// \return the assigned physical register that \p VReg is assigned to if it
+ /// is an AGPR, otherwise MCRegister().
+ MCRegister getAssignedAGPR(Register VReg) const {
+ MCRegister PhysReg = VRM.getPhys(VReg);
+ if (!PhysReg)
+ return MCRegister();
+
+ const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
+ if (!TRI.hasAGPRs(VirtRegRC))
+ return MCRegister();
+
+ if (!TRI.hasVGPRs(VirtRegRC))
+ return PhysReg;
+
+ // If this is an AV register, we have to check if the actual assignment is
+ // to an AGPR
+ const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
+ return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
+ }
+
+ bool tryReassigningMFMAChain(MachineInstr &MFMA, unsigned HintOpIdx,
+ MCPhysReg PhysRegHint) const;
+
/// Compute the register class constraints based on the uses of \p Reg,
/// excluding MFMA uses from which can be rewritten to change the register
/// class constraint. This should be nearly identical to
@@ -74,6 +104,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
SmallSetVector<Register, 4> &RewriteRegs) const;
+ bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
+ bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
bool run(MachineFunction &MF) const;
};
@@ -152,6 +184,88 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
return true;
}
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
+ MachineInstr &MFMA, unsigned HintOpIdx, MCPhysReg PhysRegHint) const {
+ // src2 and dst have the same physical class constraint; try to preserve
+ // the original src2 subclass if one were to exist.
+ SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
+ SmallSetVector<Register, 4> RewriteRegs;
+
+ Register MFMAHintReg = MFMA.getOperand(HintOpIdx).getReg();
+ // Make sure we reassign the MFMA we found the copy from first. We want
+ // to ensure dst ends up in the physreg we were originally copying to.
+ RewriteRegs.insert(MFMAHintReg);
+
+ // We've found av = COPY (MFMA), and need to verify that we can trivially
+ // rewrite src2 to use the new AGPR. If we can't trivially replace it,
+ // we're going to induce as many copies as we would have emitted in the
+ // first place, as well as need to assign another register, and need to
+ // figure out where to put them. The live range splitting is smarter than
+ // anything we're doing here, so trust it did something reasonable.
+ //
+ // Note recomputeRegClassExceptRewritable will consider the constraints of
+ // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
+ if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
+ RewriteRegs)) {
+ LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
+ << printReg(MFMAHintReg, &TRI) << '\n');
+ return false;
+ }
+
+ // If src2 and dst are different registers, we need to also reassign the
+ // input to an available AGPR if it is compatible with all other uses.
+ //
+ // If we can't reassign it, we'd need to introduce a different copy
+ // which is likely worse than the copy we'd be saving.
+ //
+ // It's likely that the MFMA is used in sequence with other MFMAs; if we
+ // cannot migrate the full use/def chain of MFMAs, we would need to
+ // introduce intermediate copies somewhere. So we only make the
+ // transform if all the interfering MFMAs can also be migrated. Collect
+ // the set of rewritable MFMAs and check if we can assign an AGPR at
+ // that point.
+ //
+ // If any of the MFMAs aren't reassignable, we give up and rollback to
+ // the original register assignments.
+
+ using RecoloringStack =
+ SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
+ RecoloringStack TentativeReassignments;
+
+ for (Register RewriteReg : RewriteRegs) {
+ LiveInterval &LI = LIS.getInterval(RewriteReg);
+ TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
+ LRM.unassign(LI);
+ }
+
+ if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
+ // Roll back the register assignments to the original state.
+ for (auto [LI, OldAssign] : TentativeReassignments) {
+ if (VRM.hasPhys(LI->reg()))
+ LRM.unassign(*LI);
+ LRM.assign(*LI, OldAssign);
+ }
+
+ return false;
+ }
+
+ // Fixup the register classes of the virtual registers now that we've
+ // committed to the reassignments.
+ for (Register InterferingReg : RewriteRegs) {
+ const TargetRegisterClass *EquivalentAGPRRegClass =
+ TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
+ MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
+ }
+
+ for (MachineInstr *RewriteCandidate : RewriteCandidates) {
+ int NewMFMAOp =
+ AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
+ RewriteCandidate->setDesc(TII.get(NewMFMAOp));
+ }
+
+ return true;
+}
+
/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
/// preference to use \p PhysReg first. Returns false if the reassignments
/// cannot be trivially performed.
@@ -204,6 +318,78 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
return true;
}
+/// Identify copies that look like:
+/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
+/// %agpr = COPY %vgpr
+///
+/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
+/// versions of the MFMA. This should cover the common case.
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
+ Register VReg, MCRegister AssignedAGPR) const {
+ bool MadeChange = false;
+ for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
+ if (!UseMI.isCopy())
+ continue;
+
+ Register CopySrcReg = UseMI.getOperand(1).getReg();
+ if (!CopySrcReg.isVirtual())
+ continue;
+
+ // TODO: Handle loop phis copied to AGPR. e.g.
+ //
+ // loop:
+ // %phi:vgpr = COPY %mfma:vgpr
+ // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
+ // s_cbranch_vccnz loop
+ //
+ // endloop:
+ // %agpr = mfma
+ //
+ // We need to be sure that %phi is assigned to the same physical register as
+ // %mfma, or else we will just be moving copies into the loop.
+
+ for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
+ if (isRewriteCandidate(CopySrcDefMI) &&
+ tryReassigningMFMAChain(CopySrcDefMI, 0, AssignedAGPR))
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// Identify copies that look like:
+/// %src:vgpr = COPY %src:agpr
+/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
+///
+/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
+/// versions of the MFMA. This should cover rarer cases, and will generally be
+/// redundant with tryFoldCopiesToAGPR.
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
+ Register VReg, MCRegister AssignedAGPR) const {
+ bool MadeChange = false;
+ for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
+ if (!UseMI.isCopy())
+ continue;
+
+ Register CopyDstReg = UseMI.getOperand(0).getReg();
+ if (!CopyDstReg.isVirtual())
+ continue;
+
+ for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) {
+ if (isRewriteCandidate(CopyUseMI)) {
+ const MachineOperand *Op =
+ CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr);
+ if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(),
+ VRM.getPhys(Op->getReg())))
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
// This only applies on subtargets that have a configurable AGPR vs. VGPR
// allocation.
@@ -220,121 +406,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
Register VReg = Register::index2VirtReg(I);
- Register PhysReg = VRM.getPhys(VReg);
- if (!PhysReg)
- continue;
-
- // Find AV_* registers assigned to AGPRs.
- const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
- if (!TRI.hasAGPRs(VirtRegRC))
+ MCRegister AssignedAGPR = getAssignedAGPR(VReg);
+ if (!AssignedAGPR)
continue;
- const TargetRegisterClass *AssignedRC = VirtRegRC;
- if (TRI.hasVGPRs(VirtRegRC)) {
- // If this is an AV register, we have to check if the actual assignment is
- // to an AGPR
- AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
- if (!TRI.isAGPRClass(AssignedRC))
- continue;
- }
-
- LiveInterval &LI = LIS.getInterval(VReg);
-
- for (VNInfo *VNI : LI.vnis()) {
- MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
- if (!DefMI || !DefMI->isCopy())
- continue;
-
- Register MFMADstReg = DefMI->getOperand(1).getReg();
- if (!MFMADstReg.isVirtual())
- continue;
-
- LiveInterval &CopySrcLI = LIS.getInterval(MFMADstReg);
- LiveQueryResult LRQ = CopySrcLI.Query(VNI->def.getRegSlot());
- MachineInstr *MFMA = LIS.getInstructionFromIndex(LRQ.valueIn()->def);
- if (!MFMA || !isRewriteCandidate(*MFMA))
- continue;
-
- // src2 and dst have the same physical class constraint; try to preserve
- // the original src2 subclass if one were to exist.
- SmallVector<MachineInstr *, 4> RewriteCandidates = {MFMA};
- SmallSetVector<Register, 4> RewriteRegs;
-
- // Make sure we reassign the MFMA we found the copy from first. We want
- // to ensure dst ends up in the physreg we were originally copying to.
- RewriteRegs.insert(MFMADstReg);
-
- // We've found av = COPY (MFMA), and need to verify that we can trivially
- // rewrite src2 to use the new AGPR. If we can't trivially replace it,
- // we're going to induce as many copies as we would have emitted in the
- // first place, as well as need to assign another register, and need to
- // figure out where to put them. The live range splitting is smarter than
- // anything we're doing here, so trust it did something reasonable.
- //
- // Note recomputeRegClassExceptRewritable will consider the constraints of
- // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
- if (!recomputeRegClassExceptRewritable(MFMADstReg, RewriteCandidates,
- RewriteRegs)) {
- LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
- << printReg(MFMADstReg, &TRI) << '\n');
- continue;
- }
-
- // If src2 and dst are different registers, we need to also reassign the
- // input to an available AGPR if it is compatible with all other uses.
- //
- // If we can't reassign it, we'd need to introduce a different copy
- // which is likely worse than the copy we'd be saving.
- //
- // It's likely that the MFMA is used in sequence with other MFMAs; if we
- // cannot migrate the full use/def chain of MFMAs, we would need to
- // introduce intermediate copies somewhere. So we only make the
- // transform if all the interfering MFMAs can also be migrated. Collect
- // the set of rewritable MFMAs and check if we can assign an AGPR at
- // that point.
- //
- // If any of the MFMAs aren't reassignable, we give up and rollback to
- // the original register assignments.
-
- using RecoloringStack =
- SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
- RecoloringStack TentativeReassignments;
-
- for (Register RewriteReg : RewriteRegs) {
- LiveInterval &LI = LIS.getInterval(RewriteReg);
- TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
- LRM.unassign(LI);
- }
-
- if (!attemptReassignmentsToAGPR(RewriteRegs, PhysReg)) {
- // Roll back the register assignments to the original state.
- for (auto [LI, OldAssign] : TentativeReassignments) {
- if (VRM.hasPhys(LI->reg()))
- LRM.unassign(*LI);
- LRM.assign(*LI, OldAssign);
- }
-
- continue;
- }
-
- // Fixup the register classes of the virtual registers now that we've
- // committed to the reassignments.
- for (Register InterferingReg : RewriteRegs) {
- const TargetRegisterClass *EquivalentAGPRRegClass =
- TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
- MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
- }
-
- for (MachineInstr *RewriteCandidate : RewriteCandidates) {
- int NewMFMAOp =
- AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
- RewriteCandidate->setDesc(TII.get(NewMFMAOp));
- }
-
- // We likely left an identity copy behind after assignment; let
- // VirtRegRewriter deal with it later.
+ if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
+ MadeChange = true;
+ if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
MadeChange = true;
- }
}
return MadeChange;
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
index 7fdc8c0d8019b..632401b6128c5 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
@@ -69,9 +69,9 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; CHECK-NEXT: SI_RETURN
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
%1:av_64_align2 = COPY $vgpr0_vgpr1
@@ -97,8 +97,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: SI_RETURN
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
@@ -126,10 +126,10 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
- ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
+ ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; CHECK-NEXT: SI_RETURN
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
%1:av_64_align2 = COPY $vgpr0_vgpr1
@@ -200,62 +200,3 @@ body: |
GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1)
SI_RETURN
...
-
-# Degenerate case. Copy from AGPR to VGPR is dead undef subreg def
----
-name: test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-
- ; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
- ; CHECK-NEXT: dead [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: SI_RETURN
- %0:vreg_64_align2 = COPY $vgpr4_vgpr5
- %1:av_64_align2 = COPY $vgpr0_vgpr1
- %2:av_64_align2 = COPY $vgpr2_vgpr3
- %3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
- %4:vreg_128_align2 = COPY %3
- undef %4.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, 0, 0, 0, 0, implicit $mode, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- SI_RETURN
-...
-
-# Degenerate case. Copy from AGPR to VGPR is dead, but same register
-# is redefined as whole register.
----
-name...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/153022
More information about the llvm-branch-commits
mailing list