[llvm] d883d5f - AMDGPU: Add testcase with bad regalloc behavior
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 18 01:47:03 PDT 2025
Author: Matt Arsenault
Date: 2025-07-18T17:46:54+09:00
New Revision: d883d5fecf8aa7db6daa0b163599d42ca00c5808
URL: https://github.com/llvm/llvm-project/commit/d883d5fecf8aa7db6daa0b163599d42ca00c5808
DIFF: https://github.com/llvm/llvm-project/commit/d883d5fecf8aa7db6daa0b163599d42ca00c5808.diff
LOG: AMDGPU: Add testcase with bad regalloc behavior
This demonstrates poor allocation due to not ordering
AV classes relative to the A and V classes
Added:
llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir b/llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir
new file mode 100644
index 0000000000000..1a457c94778fd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir
@@ -0,0 +1,74 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,2 -stop-after=virtregrewriter,2 -o - %s | FileCheck %s
+
+---
+name: bad_ra
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64, preferred-register: '$sgpr4_sgpr5' }
+ - { id: 1, class: sgpr_128, preferred-register: '%2' }
+ - { id: 2, class: areg_128, preferred-register: '%1' }
+ - { id: 3, class: areg_128, preferred-register: '%4' }
+ - { id: 4, class: av_128, preferred-register: '%3' }
+ - { id: 5, class: areg_128, preferred-register: '%6' }
+ - { id: 6, class: vreg_128, preferred-register: '%5' }
+ - { id: 7, class: areg_128, preferred-register: '%4' }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: areg_128 }
+liveins:
+ - { reg: '$sgpr4_sgpr5', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+ isCalleeSavedInfoValid: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+ vgprForAGPRCopy: '$vgpr255'
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+
+ ; CHECK-LABEL: name: bad_ra
+ ; CHECK: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM_ec renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: ("amdgpu-noclobber" load (s128), addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr4 = V_MOV_B32_e32 1065353216, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr5 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr6 = V_MOV_B32_e32 1073741824, implicit $exec
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1 = COPY renamable $agpr1
+ ; CHECK-NEXT: renamable $vgpr0 = COPY renamable $agpr0
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr3 = COPY renamable $agpr1
+ ; CHECK-NEXT: renamable $vgpr2 = COPY killed renamable $agpr0
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 killed $vgpr4, killed $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr5, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: ("amdgpu-noclobber" load (s128), addrspace 1)
+ %8:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
+ %2:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+ %3:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %2, 0, 0, 0, implicit $mode, implicit $exec
+ undef %4.sub1:av_128 = COPY %3.sub1
+ %4.sub0:av_128 = COPY %3.sub0
+ %11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %3, 0, 0, 0, implicit $mode, implicit $exec
+ %4.sub3:av_128 = COPY %11.sub1
+ %4.sub2:av_128 = COPY %11.sub0
+ %7:areg_128 = COPY %4
+ %5:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %7, 0, 0, 0, implicit $mode, implicit $exec
+ %6:vreg_128 = COPY %5
+ GLOBAL_STORE_DWORDX4_SADDR %9, %6, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list