[llvm-branch-commits] [llvm] AMDGPU: Handle rewriting VGPR MFMAs with immediate src2 (PR #153016)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 11 07:35:00 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/153016
None
>From c289a31b73dfef918f2155937afbf73ee353ee8d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 25 Jul 2025 10:23:31 +0900
Subject: [PATCH] AMDGPU: Handle rewriting VGPR MFMAs with immediate src2
---
.../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 18 ++--
...class-vgpr-mfma-to-agpr-negative-tests.mir | 87 -------------------
...class-vgpr-mfma-to-av-with-load-source.mir | 16 ++--
.../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll | 38 +-------
4 files changed, 18 insertions(+), 141 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 8c6aee89b7375..a8dfdbe5dd494 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -242,12 +242,12 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
continue;
MachineOperand *Src2 = TII.getNamedOperand(*MFMA, AMDGPU::OpName::src2);
- if (!Src2->isReg())
- continue;
-
- Register Src2Reg = Src2->getReg();
- if (!Src2Reg.isVirtual())
- continue;
+ Register Src2Reg;
+ if (Src2->isReg()) {
+ Src2Reg = Src2->getReg();
+ if (!Src2Reg.isVirtual())
+ continue;
+ }
// FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
// of an AGPR or VGPR subclass, so we can't simply use the result on the
@@ -256,13 +256,15 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
LLVM_DEBUG({
dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
<< " Dst=[" << printReg(VReg) << " => "
- << printReg(PhysReg, &TRI) << ']';
+ << printReg(PhysReg, &TRI);
if (Src2Reg) {
Register Src2PhysReg = VRM.getPhys(Src2Reg);
dbgs() << ", Src2=[" << printReg(Src2Reg, &TRI) << " => "
- << printReg(Src2PhysReg, &TRI) << "]: " << *MFMA;
+ << printReg(Src2PhysReg, &TRI);
}
+
+ dbgs() << "]: " << *MFMA;
});
const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass(MFMADstReg);
diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir
index b53bde6bfd281..3103d635200c6 100644
--- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir
@@ -16,10 +16,6 @@
ret void
}
- define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 {
- ret void
- }
-
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2() #0 {
ret void
}
@@ -341,89 +337,6 @@ body: |
...
-# Non-mac variant, src2 is an immediate.
----
-name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
- stackPtrOffsetReg: '$sgpr32'
- occupancy: 10
- sgprForEXECCopy: '$sgpr100_sgpr101'
-body: |
- ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
- ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
- ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
- ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
- ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- S_NOP 0, implicit-def $agpr0
- renamable $sgpr0 = S_MOV_B32 0
- undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
- renamable $sgpr1 = COPY renamable $sgpr0
- %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
- renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
- %0.sub9:vreg_512_align2 = COPY %0.sub8
-
- bb.1:
- liveins: $vcc
-
- %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec
- S_CBRANCH_VCCNZ %bb.1, implicit $vcc
- S_BRANCH %bb.2
-
- bb.2:
- ; No VGPRs available for %0
- S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
- S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
- S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
- S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
- S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
- GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
- GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
- S_ENDPGM 0
-
-...
-
# Non-mac variant, src2 is a physical register
---
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2
diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
index dcb45cc90ca68..3de86da766af7 100644
--- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
@@ -1553,19 +1553,18 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr20_vgpr21, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: early-clobber renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $agpr0_agpr1, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
+ ; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
@@ -1903,14 +1902,13 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
+ ; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 66002fff12155..4bc0270e61a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -235,47 +235,11 @@ bb:
ret void
}
-; TODO: Handle rewriting this case
define void @test_rewrite_mfma_imm_src2(float %arg0, float %arg1) #0 {
; CHECK-LABEL: test_rewrite_mfma_imm_src2:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 2.0
-; CHECK-NEXT: s_nop 7
-; CHECK-NEXT: s_nop 7
-; CHECK-NEXT: s_nop 1
-; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
-; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
-; CHECK-NEXT: v_accvgpr_write_b32 a2, v2
-; CHECK-NEXT: v_accvgpr_write_b32 a3, v3
-; CHECK-NEXT: v_accvgpr_write_b32 a4, v4
-; CHECK-NEXT: v_accvgpr_write_b32 a5, v5
-; CHECK-NEXT: v_accvgpr_write_b32 a6, v6
-; CHECK-NEXT: v_accvgpr_write_b32 a7, v7
-; CHECK-NEXT: v_accvgpr_write_b32 a8, v8
-; CHECK-NEXT: v_accvgpr_write_b32 a9, v9
-; CHECK-NEXT: v_accvgpr_write_b32 a10, v10
-; CHECK-NEXT: v_accvgpr_write_b32 a11, v11
-; CHECK-NEXT: v_accvgpr_write_b32 a12, v12
-; CHECK-NEXT: v_accvgpr_write_b32 a13, v13
-; CHECK-NEXT: v_accvgpr_write_b32 a14, v14
-; CHECK-NEXT: v_accvgpr_write_b32 a15, v15
-; CHECK-NEXT: v_accvgpr_write_b32 a16, v16
-; CHECK-NEXT: v_accvgpr_write_b32 a17, v17
-; CHECK-NEXT: v_accvgpr_write_b32 a18, v18
-; CHECK-NEXT: v_accvgpr_write_b32 a19, v19
-; CHECK-NEXT: v_accvgpr_write_b32 a20, v20
-; CHECK-NEXT: v_accvgpr_write_b32 a21, v21
-; CHECK-NEXT: v_accvgpr_write_b32 a22, v22
-; CHECK-NEXT: v_accvgpr_write_b32 a23, v23
-; CHECK-NEXT: v_accvgpr_write_b32 a24, v24
-; CHECK-NEXT: v_accvgpr_write_b32 a25, v25
-; CHECK-NEXT: v_accvgpr_write_b32 a26, v26
-; CHECK-NEXT: v_accvgpr_write_b32 a27, v27
-; CHECK-NEXT: v_accvgpr_write_b32 a28, v28
-; CHECK-NEXT: v_accvgpr_write_b32 a29, v29
-; CHECK-NEXT: v_accvgpr_write_b32 a30, v30
-; CHECK-NEXT: v_accvgpr_write_b32 a31, v31
+; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, 2.0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use a[0:31]
; CHECK-NEXT: ;;#ASMEND
More information about the llvm-branch-commits
mailing list