[llvm] d84c4e3 - AMDGPU: Add baseline register allocation failure test
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 10 10:12:47 PDT 2021
Author: Matt Arsenault
Date: 2021-08-10T13:12:34-04:00
New Revision: d84c4e385721ceb7fe3ef0bff88ed6a51a5337da
URL: https://github.com/llvm/llvm-project/commit/d84c4e385721ceb7fe3ef0bff88ed6a51a5337da
DIFF: https://github.com/llvm/llvm-project/commit/d84c4e385721ceb7fe3ef0bff88ed6a51a5337da.diff
LOG: AMDGPU: Add baseline register allocation failure test
Added:
llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
new file mode 100644
index 000000000000..af1f574733c5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
@@ -0,0 +1,114 @@
+# FIXME: The allocator emits an error on allocation failure, but it also produces verifier errors
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,0 -stop-after=greedy,0 -o - 2>&1 %s | FileCheck %s
+# CHECK: error: ran out of registers during register allocation
+
+---
+name: greedy_fail_alloc_sgpr1024_spill
+tracksRegLiveness: true
+frameInfo:
+ hasCalls: true
+machineFunctionInfo:
+ explicitKernArgSize: 16
+ maxKernArgAlign: 8
+ isEntryFunction: true
+ waveLimiter: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 6
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15
+
+ %0:sgpr_64 = COPY $sgpr8_sgpr9
+ %1:sgpr_32 = COPY $sgpr15
+ %2:sgpr_32 = COPY $sgpr14
+ %3:sgpr_64 = COPY $sgpr10_sgpr11
+ %4:sgpr_64 = COPY $sgpr6_sgpr7
+ %5:sgpr_64 = COPY $sgpr4_sgpr5
+ %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ undef %7.sub0:sgpr_1024 = S_MOV_B32 0
+ %7.sub1:sgpr_1024 = S_MOV_B32 0
+ %7.sub2:sgpr_1024 = S_MOV_B32 0
+ %7.sub3:sgpr_1024 = S_MOV_B32 0
+ %7.sub4:sgpr_1024 = S_MOV_B32 0
+ %7.sub5:sgpr_1024 = S_MOV_B32 0
+ %7.sub6:sgpr_1024 = S_MOV_B32 0
+ %7.sub7:sgpr_1024 = S_MOV_B32 0
+ %7.sub8:sgpr_1024 = S_MOV_B32 0
+ %7.sub9:sgpr_1024 = S_MOV_B32 0
+ %7.sub10:sgpr_1024 = S_MOV_B32 0
+ %7.sub11:sgpr_1024 = S_MOV_B32 0
+ %7.sub12:sgpr_1024 = S_MOV_B32 0
+ %7.sub13:sgpr_1024 = S_MOV_B32 0
+ %7.sub14:sgpr_1024 = S_MOV_B32 0
+ %7.sub15:sgpr_1024 = S_MOV_B32 0
+ %7.sub16:sgpr_1024 = S_MOV_B32 0
+ %7.sub17:sgpr_1024 = S_MOV_B32 0
+ %7.sub18:sgpr_1024 = S_MOV_B32 0
+ %7.sub19:sgpr_1024 = S_MOV_B32 0
+ %7.sub20:sgpr_1024 = S_MOV_B32 0
+ %8:sreg_64 = IMPLICIT_DEF
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ dead $sgpr30_sgpr31 = SI_CALL %8, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ $sgpr4_sgpr5 = COPY %5
+ $sgpr6_sgpr7 = COPY %4
+ $sgpr8_sgpr9 = COPY %0
+ $sgpr10_sgpr11 = COPY %3
+ $sgpr12 = COPY %2
+ $sgpr13 = COPY %1
+ dead $sgpr30_sgpr31 = SI_CALL %8, 0, csr_amdgpu_highregs, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ %9:sreg_64 = COPY $exec, implicit-def $exec
+ %10:sreg_64 = IMPLICIT_DEF
+
+ bb.1:
+ successors: %bb.2, %bb.4
+
+ %11:sreg_64 = COPY $exec, implicit-def $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
+
+ bb.2:
+ %12:vreg_1024 = COPY %7
+ %13:sreg_32 = S_LSHL_B32 %6.sub1, 1, implicit-def dead $scc
+ %12:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 %12, 0, %13, 3, implicit-def $m0, implicit $m0, implicit $exec
+
+ bb.3:
+ %14:sreg_64 = S_OR_SAVEEXEC_B64 %9, implicit-def $exec, implicit-def $scc, implicit $exec
+ undef %15.sub0:sgpr_1024 = COPY %7.sub0
+ %15.sub1:sgpr_1024 = COPY %7.sub0
+ %15.sub2:sgpr_1024 = COPY %7.sub0
+ %15.sub3:sgpr_1024 = COPY %7.sub0
+ %15.sub4:sgpr_1024 = COPY %7.sub0
+ %15.sub5:sgpr_1024 = COPY %7.sub0
+ %15.sub6:sgpr_1024 = COPY %7.sub0
+ %15.sub7:sgpr_1024 = COPY %7.sub0
+ %15.sub8:sgpr_1024 = COPY %7.sub0
+ %15.sub9:sgpr_1024 = COPY %7.sub0
+ %15.sub10:sgpr_1024 = COPY %7.sub0
+ %15.sub11:sgpr_1024 = COPY %7.sub0
+ %15.sub12:sgpr_1024 = COPY %7.sub0
+ %15.sub13:sgpr_1024 = COPY %7.sub0
+ %15.sub14:sgpr_1024 = COPY %7.sub0
+ %15.sub15:sgpr_1024 = COPY %7.sub0
+ %15.sub16:sgpr_1024 = COPY %7.sub0
+ %15.sub17:sgpr_1024 = COPY %7.sub0
+ %15.sub18:sgpr_1024 = COPY %7.sub0
+ %15.sub19:sgpr_1024 = COPY %7.sub0
+ %15.sub20:sgpr_1024 = COPY %7.sub0
+ %15.sub21:sgpr_1024 = COPY %7.sub0
+ ; Spill code ends up getting inserted here, and we end up with many unspillable sgpr1024 ranges
+ %16:vreg_1024 = COPY %15, implicit $exec
+ $exec = S_XOR_B64_term $exec, %14, implicit-def $scc
+ S_CBRANCH_EXECZ %bb.5, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.4:
+ $exec = S_OR_B64 $exec, %11, implicit-def $scc
+ %17:sreg_32 = S_LSHL_B32 %6.sub0, 1, implicit-def dead $scc
+ %16:vreg_1024 = COPY %7
+
+ bb.5:
+
+...
More information about the llvm-commits
mailing list