[llvm] [AMDGPU][LIT] Added a MIR LIT showing the SGPR spills (PR #94584)

Vikash Gupta via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 00:33:38 PDT 2024


https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/94584

>From ad5d50f83c7bc3fc29ce2995c0c8c0a45bfc2ff6 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Thu, 6 Jun 2024 12:36:24 +0530
Subject: [PATCH 1/6] [AMDGPU][LIT] Added a MIR LIT showing the SGPR spills

This MIR test case is added to seek the consumption of VGPR lanes
being used for SGPR spills during si-lower-sgpr-spills pass of
AMDGPU pass pipeline. Basically, in this pass, stack slots are
mapped to available VGPR lanes for spilling purpose, thus ending
the need for stack slots.

In current scenarion, each new SGPR spill goes into new VGPR lanes
as, being mapped from its distinct stack slots assigned during SGPR
allocation pass. It can be clearly seen in the added test case.
---
 .../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 231 ++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
new file mode 100644
index 0000000000000..a97d965d7e5ba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -0,0 +1,231 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+
+# This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
+# @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
+# @stack-slot-share-unequal-sized-spills-with-large-spill-first AND
+# @stack-slot-share-unequal-sized-spills-with-small-spill-first :
+# In the remaining 2 test cases mentioned in just above 2 lines, the stack slot indices is shared among the spill stack objects of
+# unequal size, with spill slot having the size of the largest of the stack objects sharing the common stack indices.
+
+--- |
+  define void @stack-slot-share-equal-sized-spills(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
+  bb:
+    %tmp = load i32, ptr addrspace(1) null, align 4
+    call void @func(i32 undef)
+    call void @func(i32 %tmp)
+    unreachable
+  }
+
+  define void @stack-slot-share-unequal-sized-spills-with-large-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
+  bb:
+    %tmp = load i32, ptr addrspace(1) null, align 4
+    call void @func(i32 undef)
+    call void @func(i32 %tmp)
+    unreachable
+  }
+
+  define void @stack-slot-share-unequal-sized-spills-with-small-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
+  bb:
+    %tmp = load i32, ptr addrspace(1) null, align 4
+    call void @func(i32 undef)
+    call void @func(i32 %tmp)
+    unreachable
+  }
+
+  declare void @func(i32)
+...
+
+---
+name:            stack-slot-share-equal-sized-spills
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+  hasCalls:        true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  frameOffsetReg: $sgpr32
+  stackPtrOffsetReg: $sgpr32
+body:             |
+  bb.0:
+    ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
+    ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
+    ; SGPR_SPILLED-NEXT: {{  $}}
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
+    ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]], implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 2, [[DEF]], implicit $sgpr0_sgpr1
+    ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
+    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+    ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
+    ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr0_sgpr1
+    ; SGPR_SPILLED-NEXT: $sgpr1 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 6, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 7, [[DEF]], implicit $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+    ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+    ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    %0:sreg_32_xm0 = COPY $sgpr32
+    %5:sreg_64 = COPY $sgpr0_sgpr1
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    $sgpr0_sgpr1 = COPY %5
+    %6:sreg_64 = COPY $sgpr2_sgpr3
+    ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    $vgpr0 = COPY %2
+    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = COPY %4
+    $sgpr2_sgpr3 = COPY %6
+    ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+...
+
+---
+name:            stack-slot-share-unequal-sized-spills-with-large-spill-first
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+  hasCalls:        true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  frameOffsetReg: $sgpr32
+  stackPtrOffsetReg: $sgpr32
+body:             |
+  bb.0:
+    ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
+    ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
+    ; SGPR_SPILLED-NEXT: {{  $}}
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
+    ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
+    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+    ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
+    ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 6, [[DEF]]
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+    ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+    ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    %0:sreg_32_xm0 = COPY $sgpr32
+    %5:sreg_64 = COPY $sgpr2_sgpr3
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    $sgpr2_sgpr3 = COPY %5
+    %6:sreg_32 = COPY $sgpr2
+    ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    $vgpr0 = COPY %2
+    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = COPY %4
+    $sgpr2 = COPY %6
+    ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+...
+
+---
+name:            stack-slot-share-unequal-sized-spills-with-small-spill-first
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+  hasCalls:        true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  frameOffsetReg: $sgpr32
+  stackPtrOffsetReg: $sgpr32
+body:             |
+  bb.0:
+    ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
+    ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
+    ; SGPR_SPILLED-NEXT: {{  $}}
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
+    ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]]
+    ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
+    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 3, [[DEF]], implicit killed $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+    ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 4, [[DEF]]
+    ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 5, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 6, [[DEF]], implicit $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
+    ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+    ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+    ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
+    ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+    ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    %0:sreg_32_xm0 = COPY $sgpr32
+    %5:sreg_32 = COPY $sgpr0
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    $sgpr0 = COPY %5
+    %6:sreg_64 = COPY $sgpr2_sgpr3
+    ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    $vgpr0 = COPY %2
+    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = COPY %4
+    $sgpr2_sgpr3 = COPY %6
+    ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+...

>From 72a510294a75a20ce4b0a67017720177177d576b Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Fri, 7 Jun 2024 14:46:35 +0530
Subject: [PATCH 2/6] Modified the LIT test to eliminate dependency on RA
 behaviour.

---
 .../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 275 ++++++++++++------
 1 file changed, 184 insertions(+), 91 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index a97d965d7e5ba..13df1672fb621 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
 
 # This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
 # @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -8,44 +8,53 @@
 # In the remaining 2 test cases mentioned in just above 2 lines, the stack slot indices is shared among the spill stack objects of
 # unequal size, with spill slot having the size of the largest of the stack objects sharing the common stack indices.
 
---- |
-  define void @stack-slot-share-equal-sized-spills(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
-  bb:
-    %tmp = load i32, ptr addrspace(1) null, align 4
-    call void @func(i32 undef)
-    call void @func(i32 %tmp)
-    unreachable
-  }
-
-  define void @stack-slot-share-unequal-sized-spills-with-large-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
-  bb:
-    %tmp = load i32, ptr addrspace(1) null, align 4
-    call void @func(i32 undef)
-    call void @func(i32 %tmp)
-    unreachable
-  }
-
-  define void @stack-slot-share-unequal-sized-spills-with-small-spill-first(ptr addrspace(1) nocapture readnone %arg, ptr addrspace(1) noalias %arg1) {
-  bb:
-    %tmp = load i32, ptr addrspace(1) null, align 4
-    call void @func(i32 undef)
-    call void @func(i32 %tmp)
-    unreachable
-  }
-
-  declare void @func(i32)
-...
-
 ---
 name:            stack-slot-share-equal-sized-spills
+alignment:       1
 tracksRegLiveness: true
+tracksDebugUserValues: true
+registers:
+  - { id: 0, class: sreg_32_xm0_xexec }
+  - { id: 1, class: sreg_64_xexec }
+  - { id: 2, class: vreg_64 }
+  - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
+  - { id: 4, class: sreg_64 }
+  - { id: 5, class: sreg_32_xm0_xexec }
+  - { id: 6, class: sreg_64_xexec }
+  - { id: 7, class: sreg_64 }
+  - { id: 8, class: sreg_64 }
+  - { id: 9, class: sreg_64 }
 frameInfo:
+  maxAlignment:    4
   adjustsStack:    true
   hasCalls:        true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
 machineFunctionInfo:
-  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
-  frameOffsetReg: $sgpr32
-  stackPtrOffsetReg: $sgpr32
+  maxKernArgAlign: 1
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr32'
+  stackPtrOffsetReg: '$sgpr32'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
+    queuePtr:        { reg: '$sgpr6_sgpr7' }
+    dispatchID:      { reg: '$sgpr10_sgpr11' }
+    workGroupIDX:    { reg: '$sgpr12' }
+    workGroupIDY:    { reg: '$sgpr13' }
+    workGroupIDZ:    { reg: '$sgpr14' }
+    LDSKernelId:     { reg: '$sgpr15' }
+    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
+    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
+  occupancy:       8
+  vgprForAGPRCopy: '$vgpr63'
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
@@ -59,13 +68,13 @@ body:             |
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 2, [[DEF]], implicit $sgpr0_sgpr1
     ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
-    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
-    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
     ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr0_sgpr1
@@ -77,41 +86,81 @@ body:             |
     ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
-    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
     ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    %0:sreg_32_xm0 = COPY $sgpr32
-    %5:sreg_64 = COPY $sgpr0_sgpr1
-    %1:vreg_64 = IMPLICIT_DEF
-    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
-    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
+    SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
+    %2:vreg_64 = IMPLICIT_DEF
+    %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
+    renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
+    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
-    $sgpr32 = COPY %0
-    %4:sreg_32_xm0 = COPY $sgpr32
-    $sgpr0_sgpr1 = COPY %5
-    %6:sreg_64 = COPY $sgpr2_sgpr3
+    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+    SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
+    $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+    SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    $vgpr0 = COPY %2
-    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
-    $sgpr32 = COPY %4
-    $sgpr2_sgpr3 = COPY %6
+    $vgpr0 = COPY %3
+    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+    $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-...
 
+...
 ---
 name:            stack-slot-share-unequal-sized-spills-with-large-spill-first
+alignment:       1
 tracksRegLiveness: true
+tracksDebugUserValues: true
+registers:
+  - { id: 0, class: sreg_32_xm0_xexec }
+  - { id: 1, class: sreg_64_xexec }
+  - { id: 2, class: vreg_64 }
+  - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
+  - { id: 4, class: sreg_64 }
+  - { id: 5, class: sreg_32_xm0_xexec }
+  - { id: 6, class: sreg_32_xm0_xexec }
+  - { id: 7, class: sreg_64 }
+  - { id: 8, class: sreg_64 }
+  - { id: 9, class: sreg_64 }
 frameInfo:
+  maxAlignment:    4
   adjustsStack:    true
   hasCalls:        true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 4, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
 machineFunctionInfo:
-  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
-  frameOffsetReg: $sgpr32
-  stackPtrOffsetReg: $sgpr32
+  maxKernArgAlign: 1
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr32'
+  stackPtrOffsetReg: '$sgpr32'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
+    queuePtr:        { reg: '$sgpr6_sgpr7' }
+    dispatchID:      { reg: '$sgpr10_sgpr11' }
+    workGroupIDX:    { reg: '$sgpr12' }
+    workGroupIDY:    { reg: '$sgpr13' }
+    workGroupIDZ:    { reg: '$sgpr14' }
+    LDSKernelId:     { reg: '$sgpr15' }
+    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
+    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
+  occupancy:       8
+  vgprForAGPRCopy: '$vgpr63'
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
@@ -125,13 +174,13 @@ body:             |
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
     ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
-    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
-    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
@@ -142,40 +191,80 @@ body:             |
     ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
-    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    %0:sreg_32_xm0 = COPY $sgpr32
-    %5:sreg_64 = COPY $sgpr2_sgpr3
-    %1:vreg_64 = IMPLICIT_DEF
-    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
-    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
+    SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
+    %2:vreg_64 = IMPLICIT_DEF
+    %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
+    renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
+    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
-    $sgpr32 = COPY %0
-    %4:sreg_32_xm0 = COPY $sgpr32
-    $sgpr2_sgpr3 = COPY %5
-    %6:sreg_32 = COPY $sgpr2
+    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+    SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
+    $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+    SI_SPILL_S32_SAVE $sgpr2, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.4, addrspace 5)
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    $vgpr0 = COPY %2
-    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
-    $sgpr32 = COPY %4
-    $sgpr2 = COPY %6
+    $vgpr0 = COPY %3
+    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+    $sgpr2 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.4, addrspace 5)
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-...
 
+...
 ---
 name:            stack-slot-share-unequal-sized-spills-with-small-spill-first
+alignment:       1
 tracksRegLiveness: true
+tracksDebugUserValues: true
+registers:
+  - { id: 0, class: sreg_32_xm0_xexec }
+  - { id: 1, class: sreg_32_xm0_xexec }
+  - { id: 2, class: vreg_64 }
+  - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
+  - { id: 4, class: sreg_64 }
+  - { id: 5, class: sreg_32_xm0_xexec }
+  - { id: 6, class: sreg_64_xexec }
+  - { id: 7, class: sreg_64 }
+  - { id: 8, class: sreg_64 }
+  - { id: 9, class: sreg_64 }
 frameInfo:
+  maxAlignment:    4
   adjustsStack:    true
   hasCalls:        true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
 machineFunctionInfo:
-  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
-  frameOffsetReg: $sgpr32
-  stackPtrOffsetReg: $sgpr32
+  maxKernArgAlign: 1
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr32'
+  stackPtrOffsetReg: '$sgpr32'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
+    queuePtr:        { reg: '$sgpr6_sgpr7' }
+    dispatchID:      { reg: '$sgpr10_sgpr11' }
+    workGroupIDX:    { reg: '$sgpr12' }
+    workGroupIDY:    { reg: '$sgpr13' }
+    workGroupIDZ:    { reg: '$sgpr14' }
+    LDSKernelId:     { reg: '$sgpr15' }
+    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
+    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
+  occupancy:       8
+  vgprForAGPRCopy: '$vgpr63'
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
@@ -188,13 +277,13 @@ body:             |
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]]
     ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
-    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    ; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 3, [[DEF]], implicit killed $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
-    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
     ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 4, [[DEF]]
     ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
@@ -205,27 +294,31 @@ body:             |
     ; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
     ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
-    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit killed $vgpr0
+    ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
     ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    %0:sreg_32_xm0 = COPY $sgpr32
-    %5:sreg_32 = COPY $sgpr0
-    %1:vreg_64 = IMPLICIT_DEF
-    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
-    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+    SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
+    SI_SPILL_S32_SAVE $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
+    %2:vreg_64 = IMPLICIT_DEF
+    %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
+    renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
+    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit undef $vgpr0
-    $sgpr32 = COPY %0
-    %4:sreg_32_xm0 = COPY $sgpr32
-    $sgpr0 = COPY %5
-    %6:sreg_64 = COPY $sgpr2_sgpr3
+    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+    SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
+    $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+    SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    $vgpr0 = COPY %2
-    dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu, implicit killed $vgpr0
-    $sgpr32 = COPY %4
-    $sgpr2_sgpr3 = COPY %6
+    $vgpr0 = COPY %3
+    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+    $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+
 ...

>From 71be145a75cae4ecb1611352ccd7e8060f89eef5 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Fri, 7 Jun 2024 15:07:38 +0530
Subject: [PATCH 3/6] Removed unnecessary machineInfo from MIR LIT test.

---
 .../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 93 -------------------
 1 file changed, 93 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 13df1672fb621..80b7b335584fb 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -10,24 +10,9 @@
 
 ---
 name:            stack-slot-share-equal-sized-spills
-alignment:       1
 tracksRegLiveness: true
-tracksDebugUserValues: true
-registers:
-  - { id: 0, class: sreg_32_xm0_xexec }
-  - { id: 1, class: sreg_64_xexec }
-  - { id: 2, class: vreg_64 }
-  - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
-  - { id: 4, class: sreg_64 }
-  - { id: 5, class: sreg_32_xm0_xexec }
-  - { id: 6, class: sreg_64_xexec }
-  - { id: 7, class: sreg_64 }
-  - { id: 8, class: sreg_64 }
-  - { id: 9, class: sreg_64 }
 frameInfo:
-  maxAlignment:    4
   adjustsStack:    true
-  hasCalls:        true
 stack:
   - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
   - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
@@ -35,26 +20,10 @@ stack:
   - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
   - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
 machineFunctionInfo:
-  maxKernArgAlign: 1
   hasSpilledSGPRs: true
   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
   frameOffsetReg:  '$sgpr32'
   stackPtrOffsetReg: '$sgpr32'
-  argumentInfo:
-    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
-    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
-    queuePtr:        { reg: '$sgpr6_sgpr7' }
-    dispatchID:      { reg: '$sgpr10_sgpr11' }
-    workGroupIDX:    { reg: '$sgpr12' }
-    workGroupIDY:    { reg: '$sgpr13' }
-    workGroupIDZ:    { reg: '$sgpr14' }
-    LDSKernelId:     { reg: '$sgpr15' }
-    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
-    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
-    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
-    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
-  occupancy:       8
-  vgprForAGPRCopy: '$vgpr63'
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
@@ -116,24 +85,9 @@ body:             |
 ...
 ---
 name:            stack-slot-share-unequal-sized-spills-with-large-spill-first
-alignment:       1
 tracksRegLiveness: true
-tracksDebugUserValues: true
-registers:
-  - { id: 0, class: sreg_32_xm0_xexec }
-  - { id: 1, class: sreg_64_xexec }
-  - { id: 2, class: vreg_64 }
-  - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
-  - { id: 4, class: sreg_64 }
-  - { id: 5, class: sreg_32_xm0_xexec }
-  - { id: 6, class: sreg_32_xm0_xexec }
-  - { id: 7, class: sreg_64 }
-  - { id: 8, class: sreg_64 }
-  - { id: 9, class: sreg_64 }
 frameInfo:
-  maxAlignment:    4
   adjustsStack:    true
-  hasCalls:        true
 stack:
   - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
   - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
@@ -141,26 +95,10 @@ stack:
   - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
   - { id: 4, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
 machineFunctionInfo:
-  maxKernArgAlign: 1
   hasSpilledSGPRs: true
   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
   frameOffsetReg:  '$sgpr32'
   stackPtrOffsetReg: '$sgpr32'
-  argumentInfo:
-    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
-    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
-    queuePtr:        { reg: '$sgpr6_sgpr7' }
-    dispatchID:      { reg: '$sgpr10_sgpr11' }
-    workGroupIDX:    { reg: '$sgpr12' }
-    workGroupIDY:    { reg: '$sgpr13' }
-    workGroupIDZ:    { reg: '$sgpr14' }
-    LDSKernelId:     { reg: '$sgpr15' }
-    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
-    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
-    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
-    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
-  occupancy:       8
-  vgprForAGPRCopy: '$vgpr63'
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
@@ -220,24 +158,9 @@ body:             |
 ...
 ---
 name:            stack-slot-share-unequal-sized-spills-with-small-spill-first
-alignment:       1
 tracksRegLiveness: true
-tracksDebugUserValues: true
-registers:
-  - { id: 0, class: sreg_32_xm0_xexec }
-  - { id: 1, class: sreg_32_xm0_xexec }
-  - { id: 2, class: vreg_64 }
-  - { id: 3, class: vgpr_32, preferred-register: '$vgpr0' }
-  - { id: 4, class: sreg_64 }
-  - { id: 5, class: sreg_32_xm0_xexec }
-  - { id: 6, class: sreg_64_xexec }
-  - { id: 7, class: sreg_64 }
-  - { id: 8, class: sreg_64 }
-  - { id: 9, class: sreg_64 }
 frameInfo:
-  maxAlignment:    4
   adjustsStack:    true
-  hasCalls:        true
 stack:
   - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
   - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
@@ -245,26 +168,10 @@ stack:
   - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
   - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
 machineFunctionInfo:
-  maxKernArgAlign: 1
   hasSpilledSGPRs: true
   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
   frameOffsetReg:  '$sgpr32'
   stackPtrOffsetReg: '$sgpr32'
-  argumentInfo:
-    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
-    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
-    queuePtr:        { reg: '$sgpr6_sgpr7' }
-    dispatchID:      { reg: '$sgpr10_sgpr11' }
-    workGroupIDX:    { reg: '$sgpr12' }
-    workGroupIDY:    { reg: '$sgpr13' }
-    workGroupIDZ:    { reg: '$sgpr14' }
-    LDSKernelId:     { reg: '$sgpr15' }
-    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
-    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
-    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
-    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
-  occupancy:       8
-  vgprForAGPRCopy: '$vgpr63'
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first

>From c99f20e8b89543a1d3a2d2d33da763fe49931873 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Tue, 11 Jun 2024 12:35:26 +0530
Subject: [PATCH 4/6] Changed MIR test to add dependency from RA

This test case is made to test post optimization SGPR spills which depends on StackSlotColoring pass which utilizes LiveStack analysis results. As for current scenarion LS analysis is really carried out at RA phase, making it necessary to be invoked in order for LS computation.
---
 .../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 142 +++++++-----------
 1 file changed, 57 insertions(+), 85 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 80b7b335584fb..9d6913e078c21 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
 
 # This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
 # @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -13,17 +13,11 @@ name:            stack-slot-share-equal-sized-spills
 tracksRegLiveness: true
 frameInfo:
   adjustsStack:    true
-stack:
-  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
-  - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
-  - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  hasCalls:        true
 machineFunctionInfo:
-  hasSpilledSGPRs: true
-  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-  frameOffsetReg:  '$sgpr32'
-  stackPtrOffsetReg: '$sgpr32'
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  frameOffsetReg: $sgpr32
+  stackPtrOffsetReg: $sgpr32
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
@@ -60,45 +54,36 @@ body:             |
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
     ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
-    SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
-    %2:vreg_64 = IMPLICIT_DEF
-    %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
-    renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
-    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    %0:sreg_32_xm0 = COPY $sgpr32
+    %5:sreg_64 = COPY $sgpr0_sgpr1
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
-    $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
-    SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
-    $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
-    SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    $sgpr0_sgpr1 = COPY %5
+    %6:sreg_64 = COPY $sgpr2_sgpr3
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    $vgpr0 = COPY %3
-    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
-    $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
-    $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
+    $vgpr0 = COPY %2
+    dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = COPY %4
+    $sgpr2_sgpr3 = COPY %6
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-
 ...
+
 ---
 name:            stack-slot-share-unequal-sized-spills-with-large-spill-first
 tracksRegLiveness: true
 frameInfo:
   adjustsStack:    true
-stack:
-  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
-  - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
-  - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 4, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  hasCalls:        true
 machineFunctionInfo:
-  hasSpilledSGPRs: true
-  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-  frameOffsetReg:  '$sgpr32'
-  stackPtrOffsetReg: '$sgpr32'
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  frameOffsetReg: $sgpr32
+  stackPtrOffsetReg: $sgpr32
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
@@ -133,45 +118,36 @@ body:             |
     ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
-    SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
-    %2:vreg_64 = IMPLICIT_DEF
-    %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
-    renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
-    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    %0:sreg_32_xm0 = COPY $sgpr32
+    %5:sreg_64 = COPY $sgpr2_sgpr3
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
-    $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
-    SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
-    $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
-    SI_SPILL_S32_SAVE $sgpr2, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    $sgpr2_sgpr3 = COPY %5
+    %6:sreg_32 = COPY $sgpr2
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    $vgpr0 = COPY %3
-    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
-    $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
-    $sgpr2 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.4, addrspace 5)
+    $vgpr0 = COPY %2
+    dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = COPY %4
+    $sgpr2 = COPY %6
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-
 ...
+
 ---
 name:            stack-slot-share-unequal-sized-spills-with-small-spill-first
 tracksRegLiveness: true
 frameInfo:
   adjustsStack:    true
-stack:
-  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
-  - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
-  - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+  hasCalls:        true
 machineFunctionInfo:
-  hasSpilledSGPRs: true
-  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-  frameOffsetReg:  '$sgpr32'
-  stackPtrOffsetReg: '$sgpr32'
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  frameOffsetReg: $sgpr32
+  stackPtrOffsetReg: $sgpr32
 body:             |
   bb.0:
     ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
@@ -206,26 +182,22 @@ body:             |
     ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
     ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
     ; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
-    SI_SPILL_S32_SAVE $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
-    %2:vreg_64 = IMPLICIT_DEF
-    %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
-    renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
-    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    %0:sreg_32_xm0 = COPY $sgpr32
+    %5:sreg_32 = COPY $sgpr0
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
-    $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
-    SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
-    $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
-    SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit undef $vgpr0
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    $sgpr0 = COPY %5
+    %6:sreg_64 = COPY $sgpr2_sgpr3
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-    $vgpr0 = COPY %3
-    renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
-    $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
-    $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
+    $vgpr0 = COPY %2
+    dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit killed $vgpr0
+    $sgpr32 = COPY %4
+    $sgpr2_sgpr3 = COPY %6
     ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
-
 ...

>From e80b5d08be7c49a913bc6265f58c00359d5150d9 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 17 Jun 2024 11:08:19 +0530
Subject: [PATCH 5/6] Added flag to indicate test invocation before SGPR
 allocation.

---
 .../CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 9d6913e078c21..8464ffa20d9a7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -1,5 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy,0 -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
+
+# INFO : The pass pipeline is started from before the first iteration of register allocation pass that represents SGPR allocation in AMDGPU pipleine.
 
 # This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
 # @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -8,6 +10,7 @@
 # In the remaining 2 test cases mentioned in just above 2 lines, the stack slot indices is shared among the spill stack objects of
 # unequal size, with spill slot having the size of the largest of the stack objects sharing the common stack indices.
 
+
 ---
 name:            stack-slot-share-equal-sized-spills
 tracksRegLiveness: true

>From ab2ad035c0360fe33c809c624c571e9e37ad529d Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 17 Jun 2024 11:22:07 +0530
Subject: [PATCH 6/6] Addressed changes in comments.

---
 .../CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 8464ffa20d9a7..887e9c4b5dc5e 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy,0 -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
 
-# INFO : The pass pipeline is started from before the first iteration of register allocation pass that represents SGPR allocation in AMDGPU pipleine.
+# INFO : The test starts from the sgpr-regalloc pipeline.
 
 # This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
 # @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -10,7 +10,6 @@
 # In the remaining 2 test cases mentioned in just above 2 lines, the stack slot indices is shared among the spill stack objects of
 # unequal size, with spill slot having the size of the largest of the stack objects sharing the common stack indices.
 
-
 ---
 name:            stack-slot-share-equal-sized-spills
 tracksRegLiveness: true



More information about the llvm-commits mailing list