[llvm] [NFC][AMDGPU] Auto generate check lines for three test cases (PR #127352)

via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 15 15:12:24 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>

- `CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll`
- `CodeGen/AMDGPU/call-preserved-registers.ll`
- `CodeGen/AMDGPU/stack-realign.ll`

This is to make preparation for another PR.

---

Patch is 92.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127352.diff


3 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll (+605-218) 
- (modified) llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll (+306-13) 
- (modified) llvm/test/CodeGen/AMDGPU/stack-realign.ll (+658-155) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index ff80e05197b0d..db9ce56ecc3cc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
@@ -5,110 +6,258 @@
 
 declare hidden void @external_void_func_void() #3
 
-; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: s_getpc_b64 s[34:35]
-; GCN-NEXT: s_add_u32 s34, s34,
-; GCN-NEXT: s_addc_u32 s35, s35,
-; GCN: s_swappc_b64 s[30:31], s[34:35]
-
-; GCN-NEXT: #ASMSTART
-; GCN-NEXT: #ASMEND
-; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35]
 define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+; FLATSCR-LABEL: test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[34:35]
+; FLATSCR-NEXT:    s_add_u32 s34, s34, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s35, s35, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    s_endpgm
   call void @external_void_func_void()
   call void asm sideeffect "", ""() #0
   call void @external_void_func_void()
   ret void
 }
 
-; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; MUBUF:   buffer_store_dword
-; FLATSCR: scratch_store_dword
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
-; GCN: v_writelane_b32 v40, s30, 0
-; GCN: v_writelane_b32 v40, s31, 1
-; GCN: v_writelane_b32 v40, s34, 2
-; GCN: v_writelane_b32 v40, s35, 3
-
-; GCN: s_swappc_b64
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_swappc_b64
-; GCN: v_readlane_b32 s35, v40, 3
-; GCN: v_readlane_b32 s34, v40, 2
-; MUBUF-DAG:   v_readlane_b32 s31, v40, 1
-; MUBUF-DAG:   v_readlane_b32 s30, v40, 0
-; FLATSCR-DAG: v_readlane_b32 s31, v40, 1
-; FLATSCR-DAG: v_readlane_b32 s30, v40, 0
-
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; MUBUF:   buffer_load_dword
-; FLATSCR: scratch_load_dword
-; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN: s_setpc_b64 s[30:31]
 define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+; MUBUF-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    v_writelane_b32 v40, s4, 4
+; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT:    s_addk_i32 s32, 0x400
+; MUBUF-NEXT:    v_writelane_b32 v40, s34, 2
+; MUBUF-NEXT:    v_writelane_b32 v40, s35, 3
+; MUBUF-NEXT:    s_getpc_b64 s[34:35]
+; MUBUF-NEXT:    s_add_u32 s34, s34, external_void_func_void at rel32@lo+4
+; MUBUF-NEXT:    s_addc_u32 s35, s35, external_void_func_void at rel32@hi+12
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    v_readlane_b32 s35, v40, 3
+; MUBUF-NEXT:    v_readlane_b32 s34, v40, 2
+; MUBUF-NEXT:    v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    v_readlane_b32 s4, v40, 4
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    v_writelane_b32 v40, s0, 4
+; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
+; FLATSCR-NEXT:    v_writelane_b32 v40, s34, 2
+; FLATSCR-NEXT:    v_writelane_b32 v40, s35, 3
+; FLATSCR-NEXT:    s_getpc_b64 s[34:35]
+; FLATSCR-NEXT:    s_add_u32 s34, s34, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s35, s35, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    v_readlane_b32 s35, v40, 3
+; FLATSCR-NEXT:    v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT:    v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    v_readlane_b32 s0, v40, 4
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void @external_void_func_void()
   call void asm sideeffect "", ""() #0
   call void @external_void_func_void()
   ret void
 }
 
-; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN: s_mov_b32 s33, s32
-; MUBUF:   buffer_store_dword v40
-; FLATSCR: scratch_store_dword off, v40
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
-; MUBUF:   s_addk_i32 s32, 0x400
-; FLATSCR: s_add_i32 s32, s32, 16
-
-; GCN: s_swappc_b64
-; GCN-NEXT: s_swappc_b64
-
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; MUBUF:   buffer_load_dword v40
-; FLATSCR: scratch_load_dword v40
-; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 define void @test_func_call_external_void_funcx2() #0 {
+; MUBUF-LABEL: test_func_call_external_void_funcx2:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, s33
+; MUBUF-NEXT:    s_mov_b32 s33, s32
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    v_writelane_b32 v40, s4, 4
+; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT:    s_addk_i32 s32, 0x400
+; MUBUF-NEXT:    v_writelane_b32 v40, s34, 2
+; MUBUF-NEXT:    v_writelane_b32 v40, s35, 3
+; MUBUF-NEXT:    s_getpc_b64 s[34:35]
+; MUBUF-NEXT:    s_add_u32 s34, s34, external_void_func_void at rel32@lo+4
+; MUBUF-NEXT:    s_addc_u32 s35, s35, external_void_func_void at rel32@hi+12
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; MUBUF-NEXT:    v_readlane_b32 s35, v40, 3
+; MUBUF-NEXT:    v_readlane_b32 s34, v40, 2
+; MUBUF-NEXT:    v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT:    s_mov_b32 s32, s33
+; MUBUF-NEXT:    v_readlane_b32 s4, v40, 4
+; MUBUF-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT:    s_mov_b32 s33, s4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_func_call_external_void_funcx2:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, s33
+; FLATSCR-NEXT:    s_mov_b32 s33, s32
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    v_writelane_b32 v40, s0, 4
+; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
+; FLATSCR-NEXT:    v_writelane_b32 v40, s34, 2
+; FLATSCR-NEXT:    v_writelane_b32 v40, s35, 3
+; FLATSCR-NEXT:    s_getpc_b64 s[34:35]
+; FLATSCR-NEXT:    s_add_u32 s34, s34, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s35, s35, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; FLATSCR-NEXT:    v_readlane_b32 s35, v40, 3
+; FLATSCR-NEXT:    v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT:    v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT:    s_mov_b32 s32, s33
+; FLATSCR-NEXT:    v_readlane_b32 s0, v40, 4
+; FLATSCR-NEXT:    s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT:    scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s33, s0
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void @external_void_func_void()
   call void @external_void_func_void()
   ret void
 }
 
-; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
-; GCN: s_waitcnt
-; GCN: v_writelane_b32 v0, s30, 0
-; GCN: v_writelane_b32 v0, s31, 1
-; GCN-NEXT: #ASMSTART
-; GCN: ; clobber
-; GCN-NEXT: #ASMEND
-; GCN: v_readlane_b32 s31, v0, 1
-; GCN: v_readlane_b32 s30, v0, 0
-; GCN: s_setpc_b64 s[30:31]
 define void @void_func_void_clobber_s30_s31() #2 {
+; MUBUF-LABEL: void_func_void_clobber_s30_s31:
+; MUBUF:       ; %bb.0:
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT:    v_writelane_b32 v0, s30, 0
+; MUBUF-NEXT:    v_writelane_b32 v0, s31, 1
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; clobber
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_readlane_b32 s31, v0, 1
+; MUBUF-NEXT:    v_readlane_b32 s30, v0, 0
+; MUBUF-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: void_func_void_clobber_s30_s31:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    s_mov_b64 exec, s[0:1]
+; FLATSCR-NEXT:    v_writelane_b32 v0, s30, 0
+; FLATSCR-NEXT:    v_writelane_b32 v0, s31, 1
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; clobber
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    v_readlane_b32 s31, v0, 1
+; FLATSCR-NEXT:    v_readlane_b32 s30, v0, 0
+; FLATSCR-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b64 exec, s[0:1]
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
   ret void
 }
 
-; GCN-LABEL: {{^}}void_func_void_clobber_vcc:
-; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_setpc_b64 s[30:31]
 define hidden void @void_func_void_clobber_vcc() #2 {
+; GCN-LABEL: void_func_void_clobber_vcc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "", "~{vcc}"() #0
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc:
-; GCN: s_getpc_b64
-; GCN-NEXT: s_add_u32
-; GCN-NEXT: s_addc_u32
-; GCN: s_mov_b64 s[34:35], vcc
-; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b64 vcc, s[34:35]
 define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_add_u32 s8, s4, 8
+; FLATSCR-NEXT:    s_addc_u32 s9, s5, 0
+; FLATSCR-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; FLATSCR-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; FLATSCR-NEXT:    s_mov_b32 s14, s12
+; FLATSCR-NEXT:    s_mov_b32 s13, s11
+; FLATSCR-NEXT:    s_mov_b32 s12, s10
+; FLATSCR-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; FLATSCR-NEXT:    s_getpc_b64 s[16:17]
+; FLATSCR-NEXT:    s_add_u32 s16, s16, void_func_void_clobber_vcc at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s17, s17, void_func_void_clobber_vcc at rel32@hi+12
+; FLATSCR-NEXT:    v_or3_b32 v31, v0, v1, v2
+; FLATSCR-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; FLATSCR-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def vcc
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_mov_b64 s[34:35], vcc
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; FLATSCR-NEXT:    global_load_dword v0, v[0:1], off glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_mov_b64 vcc, s[34:35]
+; FLATSCR-NEXT:    global_load_dword v0, v[0:1], off glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ; kill: killed $vgpr0_vgpr1
+; FLATSCR-NEXT:    ; kill: killed $vgpr0_vgpr1
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use vcc
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
   call void @void_func_void_clobber_vcc()
   %val0 = load volatile i32, ptr addrspace(1) undef
@@ -117,22 +266,50 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1)
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31:
-; GCN: s_mov_b32 s33, s31
-; GCN: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s31, s33
 define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_mayclobber_s31:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def s31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_mov_b32 s33, s31
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    s_mov_b32 s31, s33
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use s31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31:
-; GCN: v_mov_b32_e32 v40, v31
-; GCN: s_swappc_b64
-; GCN-NEXT: v_mov_b32_e32 v31, v40
 define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_mayclobber_v31:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def v31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    v_mov_b32_e32 v40, v31
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    v_mov_b32_e32 v31, v40
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use v31
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
@@ -140,175 +317,294 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace
 }
 
 ; FIXME: What is the expected behavior for reserved registers here?
-
-; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
-; FLATSCR:      s_getpc_b64 s[0:1]
-; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
-; MUBUF:        s_getpc_b64 s[4:5]
-; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
-; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void at rel32@hi+12
-
-; GCN: #ASMSTART
-; GCN-NEXT: ; def s33
-; GCN-NEXT: #ASMEND
-
-; GCN-NOT: s33
-
-; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
-; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
-
-; GCN-NOT: s33
-
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; use s33
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_preserves_s33:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def s33
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; use s33
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
   %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
   ret void
 }
 
-; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
-; GCN-NOT: s34
-
-; FLATSCR:      s_getpc_b64 s[0:1]
-; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
-; MUBUF:        s_getpc_b64 s[4:5]
-; MUBUF-NEXT:   s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
-; MUBUF-NEXT:   s_addc_u32 s5, s5, external_void_func_void at rel32@hi+12
-; GCN: s_mov_b32 s32, 0
-
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; def s34
-; GCN-NEXT: ;;#ASMEND
-
-; GCN-NOT: s34
-
-; MUBUF:   s_swappc_b64 s[30:31], s[4:5]
-; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
-
-; GCN-NOT: s34
-
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s34
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
+; FLATSCR-LABEL: test_call_void_func_void_preserves_s34:
+; FLATSCR:       ; %bb.0:
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
+; FLATSCR-NEXT:    s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
+; FLATSCR-NEXT:    s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
+; FLATSCR-NEXT:    s_mov_b32 s32, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; def s34
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/127352


More information about the llvm-commits mailing list