[llvm] [AMDGPU][LIT] Added a MIR LIT showing the SGPR spills (PR #94584)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 9 23:01:51 PDT 2024
https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/94584
>From f867e27001d04c4c021b38aa8977de6eb1e600c4 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Thu, 6 Jun 2024 12:36:24 +0530
Subject: [PATCH 1/3] [AMDGPU][LIT] Added a MIR LIT showing the SGPR spills
This MIR test case is added to seek the consumption of VGPR lanes
being used for SGPR spills during si-lower-sgpr-spills pass of
AMDGPU pass pipeline. Basically, in this pass, stack slots are
mapped to available VGPR lanes for spilling purpose, thus ending
the need for stack slots.
In current scenarion, each new SGPR spill goes into new VGPR lanes
as, being mapped from its distinct stack slots assigned during SGPR
allocation pass. It can be clearly seen in the added test case.
---
.../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 231 ++++++++++++++++++
1 file changed, 231 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
new file mode 100644
index 0000000000000..a97d965d7e5ba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -0,0 +1,231 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+
+# This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
+# @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
+# @stack-slot-share-unequal-sized-spills-with-large-spill-first AND
+# @stack-slot-share-unequal-sized-spills-with-small-spill-first :
+# In the remaining 2 test cases mentioned in just above 2 lines, the stack slot indices is shared among the spill stack objects of
+# unequal size, with spill slot having the size of the largest of the stack objects sharing the common stack indices.
+
+--- |
+ define void @stack-slot-share-equal-sized-spills(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
+ bb:
+ %tmp = load i32, ptr addrspace(1) null, align 4
+ call void @func(i32 undef)
+ call void @func(i32 %tmp)
+ unreachable
+ }
+
+ define void @stack-slot-share-unequal-sized-spills-with-large-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
+ bb:
+ %tmp = load i32, ptr addrspace(1) null, align 4
+ call void @func(i32 undef)
+ call void @func(i32 %tmp)
+ unreachable
+ }
+
+ define void @stack-slot-share-unequal-sized-spills-with-small-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
+ bb:
+ %tmp = load i32, ptr addrspace(1) null, align 4
+ call void @func(i32 undef)
+ call void @func(i32 %tmp)
+ unreachable
+ }
+
+ declare void @func(i32)
+...
+
+---
+name: stack-slot-share-equal-sized-spills
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+ hasCalls: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ frameOffsetReg: $sgpr32
+ stackPtrOffsetReg: $sgpr32
+body: |
+ bb.0:
+ ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
+ ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
+ ; SGPR_SPILLED-NEXT: {{ $}}
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
+ ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]], implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 2, [[DEF]], implicit $sgpr0_sgpr1
+ ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
+ ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
+ ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr0_sgpr1
+ ; SGPR_SPILLED-NEXT: $sgpr1 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 6, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 7, [[DEF]], implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ %0:sreg_32_xm0 = COPY $sgpr32
+ %5:sreg_64 = COPY $sgpr0_sgpr1
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+ %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
+ $sgpr32 = COPY %0
+ %4:sreg_32_xm0 = COPY $sgpr32
+ $sgpr0_sgpr1 = COPY %5
+ %6:sreg_64 = COPY $sgpr2_sgpr3
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ $vgpr0 = COPY %2
+ dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
+ $sgpr32 = COPY %4
+ $sgpr2_sgpr3 = COPY %6
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+...
+
+---
+name: stack-slot-share-unequal-sized-spills-with-large-spill-first
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+ hasCalls: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ frameOffsetReg: $sgpr32
+ stackPtrOffsetReg: $sgpr32
+body: |
+ bb.0:
+ ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
+ ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
+ ; SGPR_SPILLED-NEXT: {{ $}}
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
+ ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
+ ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 6, [[DEF]]
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ %0:sreg_32_xm0 = COPY $sgpr32
+ %5:sreg_64 = COPY $sgpr2_sgpr3
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+ %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
+ $sgpr32 = COPY %0
+ %4:sreg_32_xm0 = COPY $sgpr32
+ $sgpr2_sgpr3 = COPY %5
+ %6:sreg_32 = COPY $sgpr2
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ $vgpr0 = COPY %2
+ dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
+ $sgpr32 = COPY %4
+ $sgpr2 = COPY %6
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+...
+
+---
+name: stack-slot-share-unequal-sized-spills-with-small-spill-first
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+ hasCalls: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ frameOffsetReg: $sgpr32
+ stackPtrOffsetReg: $sgpr32
+body: |
+ bb.0:
+ ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
+ ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
+ ; SGPR_SPILLED-NEXT: {{ $}}
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
+ ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]]
+ ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
+ ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 3, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 4, [[DEF]]
+ ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 5, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 6, [[DEF]], implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+ ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ %0:sreg_32_xm0 = COPY $sgpr32
+ %5:sreg_32 = COPY $sgpr0
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+ %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
+ $sgpr32 = COPY %0
+ %4:sreg_32_xm0 = COPY $sgpr32
+ $sgpr0 = COPY %5
+ %6:sreg_64 = COPY $sgpr2_sgpr3
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+ $vgpr0 = COPY %2
+ dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
+ $sgpr32 = COPY %4
+ $sgpr2_sgpr3 = COPY %6
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+...
>From 151d5b3cfd21e9c7a46697f0c79d7f0ba40bc5f8 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Fri, 7 Jun 2024 14:46:35 +0530
Subject: [PATCH 2/3] Modified the LIT test to eliminate dependency on RA
behaviour.
---
.../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 275 ++++++++++++------
1 file changed, 184 insertions(+), 91 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index a97d965d7e5ba..13df1672fb621 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
# This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
# @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -8,44 +8,53 @@
# In the remaining 2 test cases mentioned in just above 2 lines, the stack slot indices is shared among the spill stack objects of
# unequal size, with spill slot having the size of the largest of the stack objects sharing the common stack indices.
---- |
- define void @stack-slot-share-equal-sized-spills(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
- bb:
- %tmp = load i32, ptr addrspace(1) null, align 4
- call void @func(i32 undef)
- call void @func(i32 %tmp)
- unreachable
- }
-
- define void @stack-slot-share-unequal-sized-spills-with-large-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
- bb:
- %tmp = load i32, ptr addrspace(1) null, align 4
- call void @func(i32 undef)
- call void @func(i32 %tmp)
- unreachable
- }
-
- define void @stack-slot-share-unequal-sized-spills-with-small-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
- bb:
- %tmp = load i32, ptr addrspace(1) null, align 4
- call void @func(i32 undef)
- call void @func(i32 %tmp)
- unreachable
- }
-
- declare void @func(i32)
-...
-
---
name: stack-slot-share-equal-sized-spills
+alignment: 1
tracksRegLiveness: true
+tracksDebugUserValues: true
+registers:
+ - { id: 0, class: sreg_32_xm0_xexec }
+ - { id: 1, class: sreg_64_xexec }
+ - { id: 2, class: vreg_64 }
+ - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: sreg_64_xexec }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_64 }
frameInfo:
+ maxAlignment: 4
adjustsStack: true
hasCalls: true
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+ - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+ - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
- scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr32
- stackPtrOffsetReg: $sgpr32
+ maxKernArgAlign: 1
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ occupancy: 8
+ vgprForAGPRCopy: '$vgpr63'
body: |
bb.0:
; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
@@ -59,13 +68,13 @@ body: |
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 2, [[DEF]], implicit $sgpr0_sgpr1
; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
- ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
- ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr0_sgpr1
@@ -77,41 +86,81 @@ body: |
; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
- ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- %0:sreg_32_xm0 = COPY $sgpr32
- %5:sreg_64 = COPY $sgpr0_sgpr1
- %1:vreg_64 = IMPLICIT_DEF
- %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
- %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
+ SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
+ %2:vreg_64 = IMPLICIT_DEF
+ %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
+ renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
+ SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
- $sgpr32 = COPY %0
- %4:sreg_32_xm0 = COPY $sgpr32
- $sgpr0_sgpr1 = COPY %5
- %6:sreg_64 = COPY $sgpr2_sgpr3
+ renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
+ $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+ SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+ SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- $vgpr0 = COPY %2
- dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
- $sgpr32 = COPY %4
- $sgpr2_sgpr3 = COPY %6
+ $vgpr0 = COPY %3
+ renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
+ $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+ $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-...
+...
---
name: stack-slot-share-unequal-sized-spills-with-large-spill-first
+alignment: 1
tracksRegLiveness: true
+tracksDebugUserValues: true
+registers:
+ - { id: 0, class: sreg_32_xm0_xexec }
+ - { id: 1, class: sreg_64_xexec }
+ - { id: 2, class: vreg_64 }
+ - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: sreg_32_xm0_xexec }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_64 }
frameInfo:
+ maxAlignment: 4
adjustsStack: true
hasCalls: true
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+ - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+ - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 4, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
- scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr32
- stackPtrOffsetReg: $sgpr32
+ maxKernArgAlign: 1
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ occupancy: 8
+ vgprForAGPRCopy: '$vgpr63'
body: |
bb.0:
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
@@ -125,13 +174,13 @@ body: |
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
- ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
- ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
@@ -142,40 +191,80 @@ body: |
; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
- ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- %0:sreg_32_xm0 = COPY $sgpr32
- %5:sreg_64 = COPY $sgpr2_sgpr3
- %1:vreg_64 = IMPLICIT_DEF
- %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
- %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
+ SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
+ %2:vreg_64 = IMPLICIT_DEF
+ %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
+ renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
+ SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
- $sgpr32 = COPY %0
- %4:sreg_32_xm0 = COPY $sgpr32
- $sgpr2_sgpr3 = COPY %5
- %6:sreg_32 = COPY $sgpr2
+ renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
+ $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+ SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
+ $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+ SI_SPILL_S32_SAVE $sgpr2, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.4, addrspace 5)
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- $vgpr0 = COPY %2
- dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
- $sgpr32 = COPY %4
- $sgpr2 = COPY %6
+ $vgpr0 = COPY %3
+ renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
+ $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+ $sgpr2 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.4, addrspace 5)
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-...
+...
---
name: stack-slot-share-unequal-sized-spills-with-small-spill-first
+alignment: 1
tracksRegLiveness: true
+tracksDebugUserValues: true
+registers:
+ - { id: 0, class: sreg_32_xm0_xexec }
+ - { id: 1, class: sreg_32_xm0_xexec }
+ - { id: 2, class: vreg_64 }
+ - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: sreg_64_xexec }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_64 }
frameInfo:
+ maxAlignment: 4
adjustsStack: true
hasCalls: true
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+ - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
- scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr32
- stackPtrOffsetReg: $sgpr32
+ maxKernArgAlign: 1
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ occupancy: 8
+ vgprForAGPRCopy: '$vgpr63'
body: |
bb.0:
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
@@ -188,13 +277,13 @@ body: |
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]]
; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
- ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 3, [[DEF]], implicit killed $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
- ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 4, [[DEF]]
; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
@@ -205,27 +294,31 @@ body: |
; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
- ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+ ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- %0:sreg_32_xm0 = COPY $sgpr32
- %5:sreg_32 = COPY $sgpr0
- %1:vreg_64 = IMPLICIT_DEF
- %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
- %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
+ SI_SPILL_S32_SAVE $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
+ %2:vreg_64 = IMPLICIT_DEF
+ %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
+ renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
+ SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
- $sgpr32 = COPY %0
- %4:sreg_32_xm0 = COPY $sgpr32
- $sgpr0 = COPY %5
- %6:sreg_64 = COPY $sgpr2_sgpr3
+ renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
+ $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+ SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
+ $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+ SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- $vgpr0 = COPY %2
- dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
- $sgpr32 = COPY %4
- $sgpr2_sgpr3 = COPY %6
+ $vgpr0 = COPY %3
+ renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
+ $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+ $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+
...
>From 289d8ca20bb5b838e24d82e8f16d93756aa163a4 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Fri, 7 Jun 2024 15:07:38 +0530
Subject: [PATCH 3/3] Removed unnecessary machineInfo from MIR LIT test.
---
.../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 93 -------------------
1 file changed, 93 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 13df1672fb621..80b7b335584fb 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -10,24 +10,9 @@
---
name: stack-slot-share-equal-sized-spills
-alignment: 1
tracksRegLiveness: true
-tracksDebugUserValues: true
-registers:
- - { id: 0, class: sreg_32_xm0_xexec }
- - { id: 1, class: sreg_64_xexec }
- - { id: 2, class: vreg_64 }
- - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
- - { id: 4, class: sreg_64 }
- - { id: 5, class: sreg_32_xm0_xexec }
- - { id: 6, class: sreg_64_xexec }
- - { id: 7, class: sreg_64 }
- - { id: 8, class: sreg_64 }
- - { id: 9, class: sreg_64 }
frameInfo:
- maxAlignment: 4
adjustsStack: true
- hasCalls: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
- { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
@@ -35,26 +20,10 @@ stack:
- { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
- { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
- maxKernArgAlign: 1
hasSpilledSGPRs: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr32'
stackPtrOffsetReg: '$sgpr32'
- argumentInfo:
- privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
- dispatchPtr: { reg: '$sgpr4_sgpr5' }
- queuePtr: { reg: '$sgpr6_sgpr7' }
- dispatchID: { reg: '$sgpr10_sgpr11' }
- workGroupIDX: { reg: '$sgpr12' }
- workGroupIDY: { reg: '$sgpr13' }
- workGroupIDZ: { reg: '$sgpr14' }
- LDSKernelId: { reg: '$sgpr15' }
- implicitArgPtr: { reg: '$sgpr8_sgpr9' }
- workItemIDX: { reg: '$vgpr31', mask: 1023 }
- workItemIDY: { reg: '$vgpr31', mask: 1047552 }
- workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
- occupancy: 8
- vgprForAGPRCopy: '$vgpr63'
body: |
bb.0:
; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
@@ -116,24 +85,9 @@ body: |
...
---
name: stack-slot-share-unequal-sized-spills-with-large-spill-first
-alignment: 1
tracksRegLiveness: true
-tracksDebugUserValues: true
-registers:
- - { id: 0, class: sreg_32_xm0_xexec }
- - { id: 1, class: sreg_64_xexec }
- - { id: 2, class: vreg_64 }
- - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
- - { id: 4, class: sreg_64 }
- - { id: 5, class: sreg_32_xm0_xexec }
- - { id: 6, class: sreg_32_xm0_xexec }
- - { id: 7, class: sreg_64 }
- - { id: 8, class: sreg_64 }
- - { id: 9, class: sreg_64 }
frameInfo:
- maxAlignment: 4
adjustsStack: true
- hasCalls: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
- { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
@@ -141,26 +95,10 @@ stack:
- { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
- { id: 4, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
- maxKernArgAlign: 1
hasSpilledSGPRs: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr32'
stackPtrOffsetReg: '$sgpr32'
- argumentInfo:
- privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
- dispatchPtr: { reg: '$sgpr4_sgpr5' }
- queuePtr: { reg: '$sgpr6_sgpr7' }
- dispatchID: { reg: '$sgpr10_sgpr11' }
- workGroupIDX: { reg: '$sgpr12' }
- workGroupIDY: { reg: '$sgpr13' }
- workGroupIDZ: { reg: '$sgpr14' }
- LDSKernelId: { reg: '$sgpr15' }
- implicitArgPtr: { reg: '$sgpr8_sgpr9' }
- workItemIDX: { reg: '$vgpr31', mask: 1023 }
- workItemIDY: { reg: '$vgpr31', mask: 1047552 }
- workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
- occupancy: 8
- vgprForAGPRCopy: '$vgpr63'
body: |
bb.0:
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
@@ -220,24 +158,9 @@ body: |
...
---
name: stack-slot-share-unequal-sized-spills-with-small-spill-first
-alignment: 1
tracksRegLiveness: true
-tracksDebugUserValues: true
-registers:
- - { id: 0, class: sreg_32_xm0_xexec }
- - { id: 1, class: sreg_32_xm0_xexec }
- - { id: 2, class: vreg_64 }
- - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
- - { id: 4, class: sreg_64 }
- - { id: 5, class: sreg_32_xm0_xexec }
- - { id: 6, class: sreg_64_xexec }
- - { id: 7, class: sreg_64 }
- - { id: 8, class: sreg_64 }
- - { id: 9, class: sreg_64 }
frameInfo:
- maxAlignment: 4
adjustsStack: true
- hasCalls: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
- { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
@@ -245,26 +168,10 @@ stack:
- { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
- { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
- maxKernArgAlign: 1
hasSpilledSGPRs: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr32'
stackPtrOffsetReg: '$sgpr32'
- argumentInfo:
- privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
- dispatchPtr: { reg: '$sgpr4_sgpr5' }
- queuePtr: { reg: '$sgpr6_sgpr7' }
- dispatchID: { reg: '$sgpr10_sgpr11' }
- workGroupIDX: { reg: '$sgpr12' }
- workGroupIDY: { reg: '$sgpr13' }
- workGroupIDZ: { reg: '$sgpr14' }
- LDSKernelId: { reg: '$sgpr15' }
- implicitArgPtr: { reg: '$sgpr8_sgpr9' }
- workItemIDX: { reg: '$vgpr31', mask: 1023 }
- workItemIDY: { reg: '$vgpr31', mask: 1047552 }
- workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
- occupancy: 8
- vgprForAGPRCopy: '$vgpr63'
body: |
bb.0:
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
More information about the llvm-commits
mailing list