[llvm] AMDGPU: Add more baseline tests for VGPR MFMA rewrite pass (PR #148173)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 02:47:56 PDT 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/148173
>From 86969ba2f6c5f58bb4d05b392cafca412e630c34 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 11 Jul 2025 18:14:22 +0900
Subject: [PATCH] AMDGPU: Add more baseline tests for VGPR MFMA rewrite pass
Also some cosmetic renumbering of an existing test. Probably more
to come, there are a lot of edge cases in the pending patch.
---
...class-vgpr-mfma-to-av-with-load-source.mir | 670 ++++++++++++++++++
.../inflate-reg-class-vgpr-mfma-to-av.mir | 86 +++
2 files changed, 756 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
index 665d63476372f..ca2f6e22ea966 100644
--- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
@@ -356,3 +356,673 @@ body: |
S_ENDPGM 0
...
+
+# The second MFMA has additional references to the rewritten register
+# %0 in src0/src1, not just src2.
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_two_chained_uses_repeated_operand
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_two_chained_uses_repeated_operand
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 killed $vgpr4_vgpr5, $vgpr2_vgpr3, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %0.sub2_sub3, %0.sub0_sub1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
+
+# There is a rewrite candidate, but it is used by another MFMA which
+# does not have a tied result.
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_2nd_use_not_tied
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_2nd_use_not_tied
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr16_vgpr17 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr16_vgpr17
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ %3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0 or %3
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %4, %3.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %4, %3.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %4, %3.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %4, %3.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
+
+# Chain of 2 untied cases
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ %3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0 or %4
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
+
+# There isn't an assignable AGPR around the first MFMA.
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ ; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ %3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0 or %4
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
+
+# There isn't an assignable AGPR around the second MFMA.
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ ; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ ; CHECK-NEXT: S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ %3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+ S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+ S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
+ S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+ S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
+ S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
+ S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
+ S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0 or %4
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
+
+# Chain of 2 untied cases, but the use isn't in src2.
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_non_src2
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_non_src2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr4_vgpr5, $vgpr8_vgpr9, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ %3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %3.sub4_sub5, %3.sub8_sub9, undef %6:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0 or %4
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
+
+# Chain of 2 untied cases, but the second mfma is a different size and
+# uses a subregister.
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_subreg
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_subreg
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ %3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ %4:vreg_128_align2 = V_MFMA_F32_16X16X16F16_vgprcd_e64 %1, %1, %3.sub2_sub3_sub4_sub5, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0 or %4
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %5, %4, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av.mir
index 33554f8525101..ee0d729e2abbf 100644
--- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av.mir
@@ -188,3 +188,89 @@ body: |
S_ENDPGM 0
...
+
+---
+name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_two_chained_uses
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_two_chained_uses
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
+ ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $vgpr11 = COPY renamable $vgpr10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000000F0000
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr10_vgpr11, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ S_NOP 0, implicit-def $agpr0
+ renamable $sgpr0 = S_MOV_B32 0
+ undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr1 = COPY renamable $sgpr0
+ %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
+ renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ %0.sub9:vreg_512_align2 = COPY %0.sub8
+
+ bb.1:
+ liveins: $vcc
+
+ undef %0.sub0_sub1:vreg_512_align2 = nofpexcept V_PK_MUL_F32 8, %0.sub8_sub9, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; No VGPRs available for %0
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list