[llvm] d883d5f - AMDGPU: Add testcase with bad regalloc behavior

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 18 01:47:03 PDT 2025


Author: Matt Arsenault
Date: 2025-07-18T17:46:54+09:00
New Revision: d883d5fecf8aa7db6daa0b163599d42ca00c5808

URL: https://github.com/llvm/llvm-project/commit/d883d5fecf8aa7db6daa0b163599d42ca00c5808
DIFF: https://github.com/llvm/llvm-project/commit/d883d5fecf8aa7db6daa0b163599d42ca00c5808.diff

LOG: AMDGPU: Add testcase with bad regalloc behavior

This demonstrates poor allocation due to not ordering
AV classes relative to the A and V classes

Added: 
    llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir b/llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir
new file mode 100644
index 0000000000000..1a457c94778fd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bad-agpr-vgpr-regalloc-priority.mir
@@ -0,0 +1,74 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,2 -stop-after=virtregrewriter,2 -o - %s | FileCheck %s
+
+---
+name:            bad_ra
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64, preferred-register: '$sgpr4_sgpr5' }
+  - { id: 1, class: sgpr_128, preferred-register: '%2' }
+  - { id: 2, class: areg_128, preferred-register: '%1' }
+  - { id: 3, class: areg_128, preferred-register: '%4' }
+  - { id: 4, class: av_128, preferred-register: '%3' }
+  - { id: 5, class: areg_128, preferred-register: '%6' }
+  - { id: 6, class: vreg_128, preferred-register: '%5' }
+  - { id: 7, class: areg_128, preferred-register: '%4' }
+  - { id: 8, class: vgpr_32 }
+  - { id: 9, class: vgpr_32 }
+  - { id: 10, class: vgpr_32 }
+  - { id: 11, class: areg_128 }
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+  isCalleeSavedInfoValid: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       10
+  vgprForAGPRCopy: '$vgpr255'
+  sgprForEXECCopy: '$sgpr100_sgpr101'
+body:             |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    ; CHECK-LABEL: name: bad_ra
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM_ec renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+    ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: ("amdgpu-noclobber" load (s128), addrspace 1)
+    ; CHECK-NEXT: renamable $vgpr4 = V_MOV_B32_e32 1065353216, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr5 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr6 = V_MOV_B32_e32 1073741824, implicit $exec
+    ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr1 = COPY renamable $agpr1
+    ; CHECK-NEXT: renamable $vgpr0 = COPY renamable $agpr0
+    ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr3 = COPY renamable $agpr1
+    ; CHECK-NEXT: renamable $vgpr2 = COPY killed renamable $agpr0
+    ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 killed $vgpr4, killed $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3
+    ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr5, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    early-clobber renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+    renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: ("amdgpu-noclobber" load (s128), addrspace 1)
+    %8:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %10:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
+    %2:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+    %3:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %2, 0, 0, 0, implicit $mode, implicit $exec
+    undef %4.sub1:av_128 = COPY %3.sub1
+    %4.sub0:av_128 = COPY %3.sub0
+    %11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %3, 0, 0, 0, implicit $mode, implicit $exec
+    %4.sub3:av_128 = COPY %11.sub1
+    %4.sub2:av_128 = COPY %11.sub0
+    %7:areg_128 = COPY %4
+    %5:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %7, 0, 0, 0, implicit $mode, implicit $exec
+    %6:vreg_128 = COPY %5
+    GLOBAL_STORE_DWORDX4_SADDR %9, %6, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list