[llvm] [NFC][AMDGPU] Autogenerating test cases (PR #124507)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 26 21:47:27 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Aaditya (easyonaadit)

<details>
<summary>Changes</summary>



---

Patch is 180.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124507.diff


3 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll (+2406-394) 
- (modified) llvm/test/CodeGen/AMDGPU/nested-calls.ll (+64-32) 
- (modified) llvm/test/CodeGen/AMDGPU/sibling-call.ll (+568-232) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 6fb071dd42d2ff..3241a76d46a1e0 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -1,123 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,FLATSCR %s
 
-; GCN-LABEL: {{^}}callee_no_stack:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @callee_no_stack() #0 {
+; GCN-LABEL: callee_no_stack:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   ret void
 }
 
-; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; MUBUF-NEXT:   s_mov_b32 [[FP_COPY:s4]], s33
-; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
-; GCN-NEXT: s_setpc_b64
 define void @callee_no_stack_no_fp_elim_all() #1 {
+; MUBUF-LABEL: callee_no_stack_no_fp_elim_all:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_no_stack_no_fp_elim_all:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   ret void
 }
 
-; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
+; GCN-LABEL: callee_no_stack_no_fp_elim_nonleaf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   ret void
 }
 
-; GCN-LABEL: {{^}}callee_with_stack:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT:   buffer_store_dword v0, off, s[0:3], s32{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @callee_with_stack() #0 {
+; MUBUF-LABEL: callee_with_stack:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 0
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
 ; Can use free call clobbered register to preserve original FP value.
-
-; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; MUBUF-NEXT:   s_mov_b32 [[FP_COPY:s4]], s33
-; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT:   s_addk_i32 s32, 0x200
-; FLATSCR-NEXT: s_add_i32 s32, s32, 8
-; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT:   buffer_store_dword v0, off, s[0:3], s33{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s33{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT:   s_mov_b32 s32, s33
-; FLATSCR-NEXT: s_mov_b32 s32, s33
-; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
-; GCN-NEXT: s_setpc_b64
 define void @callee_with_stack_no_fp_elim_all() #1 {
+; MUBUF-LABEL: callee_with_stack_no_fp_elim_all:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_addk_i32 s32, 0x200
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 0
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s33
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack_no_fp_elim_all:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 8
+; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s33
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
-; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT:   buffer_store_dword v0, off, s[0:3], s32{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32{{$}}
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
+; MUBUF-LABEL: callee_with_stack_no_fp_elim_non_leaf:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 0
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack_no_fp_elim_non_leaf:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
-; GCN-LABEL: {{^}}callee_with_stack_and_call:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], 2
-; MUBUF-DAG:   s_addk_i32 s32, 0x400{{$}}
-; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}}
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
-; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
-
-; MUBUF-DAG:   buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
-; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33{{$}}
-
-; GCN: s_swappc_b64
-
-; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]]
-; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]]
-
-; MUBUF:    s_mov_b32 s32, s33{{$}}
-; FLATSCR:  s_mov_b32 s32, s33{{$}}
-; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], 2
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
-; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-
-; GCN-NEXT: s_setpc_b64 s[30:31]
 define void @callee_with_stack_and_call() #0 {
+; MUBUF-LABEL: callee_with_stack_and_call:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s16, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[18:19]
+; MUBUF-NEXT:    v_writelane_b32 v40, s16, 2
+; MUBUF-NEXT:    s_addk_i32 s32, 0x400
+; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s33
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_getpc_b64 s[16:17]
+; MUBUF-NEXT:    s_add_u32 s16, s16, external_void_func_void at rel32@lo+4
+; MUBUF-NEXT:    s_addc_u32 s17, s17, external_void_func_void at rel32@hi+12
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; MUBUF-NEXT:    v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    v_readlane_b32 s4, v40, 2
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack_and_call:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    v_writelane_b32 v40, s0, 2
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
+; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s33
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    v_readlane_b32 s0, v40, 2
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   call void @external_void_func_void()
@@ -130,36 +184,60 @@ define void @callee_with_stack_and_call() #0 {
 ; There is stack usage only because of the need to evict a VGPR for
 ; spilling CSR SGPRs.
 
-; GCN-LABEL: {{^}}callee_no_stack_with_call:
-; GCN: s_waitcnt
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; MUBUF-DAG:   s_addk_i32 s32, 0x400
-; FLATSCR-DAG: s_add_i32 s32, s32, 16
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], [[FP_SPILL_LANE:[0-9]+]]
-
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
-; GCN: s_swappc_b64
-
-; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0
-; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1
-
-; MUBUF:   s_mov_b32 s32, s33
-; FLATSCR: s_mov_b32 s32, s33
-; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], [[FP_SPILL_LANE]]
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
-; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: s_setpc_b64 s[30:31]
 define void @callee_no_stack_with_call() #0 {
+; MUBUF-LABEL: callee_no_stack_with_call:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s16, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[18:19]
+; MUBUF-NEXT:    v_writelane_b32 v40, s16, 2
+; MUBUF-NEXT:    s_addk_i32 s32, 0x400
+; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT:    s_getpc_b64 s[16:17]
+; MUBUF-NEXT:    s_add_u32 s16, s16, external_void_func_void at rel32@lo+4
+; MUBUF-NEXT:    s_addc_u32 s17, s17, external_void_func_void at rel32@hi+12
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; MUBUF-NEXT:    v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    v_readlane_b32 s4, v40, 2
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_no_stack_with_call:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    v_writelane_b32 v40, s0, 2
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
+; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    v_readlane_b32 s0, v40, 2
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void @external_void_func_void()
   ret void
 }
@@ -168,26 +246,306 @@ declare hidden void @external_void_func_void() #0
 
 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and
 ; restored. No FP is required.
-;
-; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
-; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN: v_writelane_b32 [[CSR_VGPR]], s
-; GCN: v_writelane_b32 [[CSR_VGPR]], s
-
-; GCN: ;;#ASMSTART
-; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
-; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
-
-; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
+; MUBUF-LABEL: callee_func_sgpr_spill_no_calls:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT:    v_writelane_b32 v40, s36, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s37, 1
+; MUBUF-NEXT:    v_writelane_b32 v40, s38, 2
+; MUBUF-NEXT:    v_writelane_b32 v40, s39, 3
+; MUBUF-NEXT:    v_writelane_b32 v40, s40, 4
+; MUBUF-NEXT:    v_writelane_b32 v40, s41, 5
+; MUBUF-NEXT:    v_writelane_b32 v40, s42, 6
+; MUBUF-NEXT:    v_writelane_b32 v40, s43, 7
+; MUBUF-NEXT:    v_writelane_b32 v40, s44, 8
+; MUBUF-NEXT:    v_writelane_b32 v40, s45, 9
+; MUBUF-NEXT:    v_writelane_b32 v40, s46, 10
+; MUBUF-NEXT:    v_writelane_b32 v40, s47, 11
+; MUBUF-NEXT:    v_writelane_b32 v40, s48, 12
+; MUBUF-NEXT:    v_writelane_b32 v40, s49, 13
+; MUBUF-NEXT:    v_writelane_b32 v40, s50, 14
+; MUBUF-NEXT:    v_writelane_b32 v40, s51, 15
+; MUBUF-NEXT:    v_writelane_b32 v40, s52, 16
+; MUBUF-NEXT:    v_writelane_b32 v40, s53, 17
+; MUBUF-NEXT:    v_writelane_b32 v40, s54, 18
+; MUBUF-NEXT:    v_writelane_b32 v40, s55, 19
+; MUBUF-NEXT:    v_writelane_b32 v40, s56, 20
+; MUBUF-NEXT:    v_writelane_b32 v40, s57, 21
+; MUBUF-NEXT:    v_writelane_b32 v40, s58, 22
+; MUBUF-NEXT:    v_writelane_b32 v40, s59, 23
+; MUBUF-NEXT:    v_writelane_b32 v40, s60, 24
+; MUBUF-NEXT:    v_writelane_b32 v40, s61, 25
+; MUBUF-NEXT:    v_writelane_b32 v40, s62, 26
+; MUBUF-NEXT:    v_writelane_b32 v40, s63, 27
+; MUBUF-NEXT:    v_writelane_b32 v40, s64, 28
+; MUBUF-NEXT:    v_writelane_b32 v40, s65, 29
+; MUBUF-NEXT:    v_writelane_b32 v40, s66, 30
+; MUBUF-NEXT:    v_writelane_b32 v40, s67, 31
+; MUBUF-NEXT:    v_writelane_b32 v40, s68, 32
+; MUBUF-NEXT:    v_writelane_b32 v40, s69, 33
+; MUBUF-NEXT:    v_writelane_b32 v40, s70, 34
+; MUBUF-NEXT:    v_writelane_b32 v40, s71, 35
+; MUBUF-NEXT:    v_writelane_b32 v40, s72, 36
+; MUBUF-NEXT:    v_writelane_b32 v40, s73, 37
+; MUBUF-NEXT:    v_writelane_b32 v40, s74, 38
+; MUBUF-NEXT:    v_writelane_b32 v40, s75, 39
+; MUBUF-NEXT:    v_writelane_b32 v40, s76, 40
+; MUBUF-NEXT:    v_writelane_b32 v40, s77, 41
+; MUBUF-NEXT:    v_writelane_b32 v40, s78, 42
+; MUBUF-NEXT:    v_writelane_b32 v40, s79, 43
+; MUBUF-NEXT:    v_writelane_b32 v40, s80, 44
+; MUBUF-NEXT:    v_writelane_b32 v40, s81, 45
+; MUBUF-NEXT:    v_writelane_b32 v40, s82, 46
+; MUBUF-NEXT:    v_writelane_b32 v40, s83, 47
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; def s[68:83]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; def s[52:67]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; def s[36:51]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; def s[4:19]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; def s[20:27]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; def s[28:29]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; use s[68:83]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; use s[52:67]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; use s[36:51]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; use s[20:27]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; use s[28:29]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; use s[4:19]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_readlane_b32 s83, v40, 47
+; MUBUF-NEXT:    v_readlane_b32 s82, v40, 46
+; MUBUF-NEXT:    v_readlane_b32 s81, v40, 45
+; MUBUF-NEXT:    v_readlane_b32 s80, v40, 44
+; MUBUF-NEXT:    v_readlane_b32 s79, v40, 43
+; MUBUF-NEXT:    v_readlane_b32 s78, v40, 42
+; MUBUF-NEXT:    v_readlane_b32 s77, v40, 41
+; ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/124507


More information about the llvm-commits mailing list