[llvm] d84c4e3 - AMDGPU: Add baseline register allocation failure test

Tue Aug 10 10:12:47 PDT 2021

Author: Matt Arsenault
Date: 2021-08-10T13:12:34-04:00
New Revision: d84c4e385721ceb7fe3ef0bff88ed6a51a5337da

URL: https://github.com/llvm/llvm-project/commit/d84c4e385721ceb7fe3ef0bff88ed6a51a5337da
DIFF: https://github.com/llvm/llvm-project/commit/d84c4e385721ceb7fe3ef0bff88ed6a51a5337da.diff

LOG: AMDGPU: Add baseline register allocation failure test

Added: 
    llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
new file mode 100644
index 000000000000..af1f574733c5

--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
@@ -0,0 +1,114 @@
+#  FIXME: The allocator emits an error on allocation failure, but it also produces verifier errors
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,0 -stop-after=greedy,0 -o - 2>&1 %s | FileCheck %s
+# CHECK: error: ran out of registers during register allocation
+
+---
+name:            greedy_fail_alloc_sgpr1024_spill
+tracksRegLiveness: true
+frameInfo:
+  hasCalls:        true
+machineFunctionInfo:
+  explicitKernArgSize: 16
+  maxKernArgAlign: 8
+  isEntryFunction: true
+  waveLimiter:     true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       6
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15
+
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sgpr_32 = COPY $sgpr15
+    %2:sgpr_32 = COPY $sgpr14
+    %3:sgpr_64 = COPY $sgpr10_sgpr11
+    %4:sgpr_64 = COPY $sgpr6_sgpr7
+    %5:sgpr_64 = COPY $sgpr4_sgpr5
+    %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    undef %7.sub0:sgpr_1024 = S_MOV_B32 0
+    %7.sub1:sgpr_1024 = S_MOV_B32 0
+    %7.sub2:sgpr_1024 = S_MOV_B32 0
+    %7.sub3:sgpr_1024 = S_MOV_B32 0
+    %7.sub4:sgpr_1024 = S_MOV_B32 0
+    %7.sub5:sgpr_1024 = S_MOV_B32 0
+    %7.sub6:sgpr_1024 = S_MOV_B32 0
+    %7.sub7:sgpr_1024 = S_MOV_B32 0
+    %7.sub8:sgpr_1024 = S_MOV_B32 0
+    %7.sub9:sgpr_1024 = S_MOV_B32 0
+    %7.sub10:sgpr_1024 = S_MOV_B32 0
+    %7.sub11:sgpr_1024 = S_MOV_B32 0
+    %7.sub12:sgpr_1024 = S_MOV_B32 0
+    %7.sub13:sgpr_1024 = S_MOV_B32 0
+    %7.sub14:sgpr_1024 = S_MOV_B32 0
+    %7.sub15:sgpr_1024 = S_MOV_B32 0
+    %7.sub16:sgpr_1024 = S_MOV_B32 0
+    %7.sub17:sgpr_1024 = S_MOV_B32 0
+    %7.sub18:sgpr_1024 = S_MOV_B32 0
+    %7.sub19:sgpr_1024 = S_MOV_B32 0
+    %7.sub20:sgpr_1024 = S_MOV_B32 0
+    %8:sreg_64 = IMPLICIT_DEF
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    dead $sgpr30_sgpr31 = SI_CALL %8, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    $sgpr4_sgpr5 = COPY %5
+    $sgpr6_sgpr7 = COPY %4
+    $sgpr8_sgpr9 = COPY %0
+    $sgpr10_sgpr11 = COPY %3
+    $sgpr12 = COPY %2
+    $sgpr13 = COPY %1
+    dead $sgpr30_sgpr31 = SI_CALL %8, 0, csr_amdgpu_highregs, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    %9:sreg_64 = COPY $exec, implicit-def $exec
+    %10:sreg_64 = IMPLICIT_DEF
+
+  bb.1:
+    successors: %bb.2, %bb.4
+
+    %11:sreg_64 = COPY $exec, implicit-def $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
+
+  bb.2:
+    %12:vreg_1024 = COPY %7
+    %13:sreg_32 = S_LSHL_B32 %6.sub1, 1, implicit-def dead $scc
+    %12:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 %12, 0, %13, 3, implicit-def $m0, implicit $m0, implicit $exec
+
+  bb.3:
+    %14:sreg_64 = S_OR_SAVEEXEC_B64 %9, implicit-def $exec, implicit-def $scc, implicit $exec
+    undef %15.sub0:sgpr_1024 = COPY %7.sub0
+    %15.sub1:sgpr_1024 = COPY %7.sub0
+    %15.sub2:sgpr_1024 = COPY %7.sub0
+    %15.sub3:sgpr_1024 = COPY %7.sub0
+    %15.sub4:sgpr_1024 = COPY %7.sub0
+    %15.sub5:sgpr_1024 = COPY %7.sub0
+    %15.sub6:sgpr_1024 = COPY %7.sub0
+    %15.sub7:sgpr_1024 = COPY %7.sub0
+    %15.sub8:sgpr_1024 = COPY %7.sub0
+    %15.sub9:sgpr_1024 = COPY %7.sub0
+    %15.sub10:sgpr_1024 = COPY %7.sub0
+    %15.sub11:sgpr_1024 = COPY %7.sub0
+    %15.sub12:sgpr_1024 = COPY %7.sub0
+    %15.sub13:sgpr_1024 = COPY %7.sub0
+    %15.sub14:sgpr_1024 = COPY %7.sub0
+    %15.sub15:sgpr_1024 = COPY %7.sub0
+    %15.sub16:sgpr_1024 = COPY %7.sub0
+    %15.sub17:sgpr_1024 = COPY %7.sub0
+    %15.sub18:sgpr_1024 = COPY %7.sub0
+    %15.sub19:sgpr_1024 = COPY %7.sub0
+    %15.sub20:sgpr_1024 = COPY %7.sub0
+    %15.sub21:sgpr_1024 = COPY %7.sub0
+    ; Spill code ends up getting inserted here, and we end up with many unspillable sgpr1024 ranges
+    %16:vreg_1024 = COPY %15, implicit $exec
+    $exec = S_XOR_B64_term $exec, %14, implicit-def $scc
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.4:
+    $exec = S_OR_B64 $exec, %11, implicit-def $scc
+    %17:sreg_32 = S_LSHL_B32 %6.sub0, 1, implicit-def dead $scc
+    %16:vreg_1024 = COPY %7
+
+  bb.5:
+
+...