[llvm] [AMDGPU] whole wave CSR tests on gfx1250. NFC. (PR #157166)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 5 12:27:51 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

<details>
<summary>Changes</summary>



---

Patch is 409.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157166.diff


1 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll (+4036-1269) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index f7af06948ec41..7a985456379b8 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -3,6 +3,7 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefix=GISEL %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=DAGISEL64 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GISEL64 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1250 < %s | FileCheck --check-prefix=GFX1250-DAGISEL %s
 
 ; Make sure the i1 %active is passed through EXEC.
 ; The EXEC mask should be set to -1 for the duration of the function
@@ -106,6 +107,28 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
 ; GISEL64-NEXT:    s_mov_b64 exec, vcc
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: basic_test:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %x = select i1 %active, i32 %a, i32 5
   %y = select i1 %active, i32 %b, i32 3
   %ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false)
@@ -209,6 +232,28 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 %
 ; GISEL64-NEXT:    s_mov_b64 exec, vcc
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: single_use_of_active:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    v_cndmask_b32_e32 v1, 17, v1, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %y = select i1 %active, i32 %b, i32 17
   %ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a, i32 %y, i32 1, i32 1, i32 1, i1 false)
   ret i32 %ret
@@ -287,6 +332,22 @@ define amdgpu_gfx_whole_wave i32 @unused_active(i1 %active, i32 %a, i32 %b) {
 ; GISEL64-NEXT:    s_mov_b64 exec, s[0:1]
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: unused_active:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    v_mov_b32_e32 v0, 14
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   ret i32 14
 }
 
@@ -450,6 +511,44 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) {
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_wait_alu 0xf1ff
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: csr:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x3
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:8 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:16 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s32 offset:12 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT:    ; clobber CSR
+; GFX1250-DAGISEL-NEXT:    ;;#ASMEND
+; GFX1250-DAGISEL-NEXT:    v_writelane_b32 v2, s20, 0
+; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT:    ; clobber non-CSR
+; GFX1250-DAGISEL-NEXT:    ;;#ASMEND
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
+; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s20, v2, 0
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x3
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:8
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:16
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %x = select i1 %active, i32 %a, i32 5
   %y = select i1 %active, i32 %b, i32 3
   call void asm sideeffect "; clobber CSR", "~{v40},~{s48}"()
@@ -531,6 +630,22 @@ define amdgpu_gfx_whole_wave void @csr_vgpr_only(i1 %active, i32 %a, i32 %b) {
 ; GISEL64-NEXT:    s_mov_b64 exec, s[0:1]
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: csr_vgpr_only:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_or_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s32 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT:    ; clobber CSR VGPR
+; GFX1250-DAGISEL-NEXT:    ;;#ASMEND
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   call void asm sideeffect "; clobber CSR VGPR", "~{v40}"()
   ret void
 }
@@ -627,6 +742,27 @@ define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
 ; GISEL64-NEXT:    s_mov_b64 exec, s[0:1]
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: sgpr_spill_only:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    v_writelane_b32 v0, s68, 0
+; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT:    ; clobber CSR SGPR
+; GFX1250-DAGISEL-NEXT:    ;;#ASMEND
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s68, v0, 0
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   call void asm sideeffect "; clobber CSR SGPR", "~{s68}"()
   ret void
 }
@@ -751,6 +887,34 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
 ; GISEL64-NEXT:    s_mov_b64 exec, vcc
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: multiple_blocks:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-DAGISEL-NEXT:    v_cmpx_eq_u32_e64 v0, v1
+; GFX1250-DAGISEL-NEXT:  ; %bb.1: ; %if.then
+; GFX1250-DAGISEL-NEXT:    v_add_nc_u32_e32 v1, v0, v1
+; GFX1250-DAGISEL-NEXT:  ; %bb.2: ; %if.end
+; GFX1250-DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %c = icmp eq i32 %a, %b
   br i1 %c, label %if.then, label %if.end
 
@@ -888,6 +1052,34 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
 ; GISEL64-NEXT:    s_mov_b64 exec, vcc
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: ret_64:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x3
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v1, 0, v1 :: v_dual_cndmask_b32 v0, 5, v0
+; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v2, 3, v2 :: v_dual_cndmask_b32 v3, 0, v3
+; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x3
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %x = select i1 %active, i64 %a, i64 5
   %y = select i1 %active, i64 %b, i64 3
   %ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
@@ -1053,6 +1245,41 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i
 ; GISEL64-NEXT:    s_mov_b64 exec, s[34:35]
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: inreg_args:
+; GFX1250-DAGISEL:       ; %bb.0:
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x5
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9
+; GFX1250-DAGISEL-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
+; GFX1250-DAGISEL-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s10 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_clause 0x1
+; GFX1250-DAGISEL-NEXT:    scratch_store_b128 off, v[0:3], s11 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s11 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
+; GFX1250-DAGISEL-NEXT:    s_clause 0x5
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   store i32 %i32, ptr addrspace(5) %ptr
   store <4 x i32> %v4i32, ptr addrspace(5) %ptr2
   store float %float, ptr addrspace(5) %ptr2
@@ -2409,1276 +2636,1904 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
 ; GISEL64-NEXT:    s_wait_loadcnt 0x0
 ; GISEL64-NEXT:    s_wait_alu 0xfffe
 ; GISEL64-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
-  ret <2 x half> %ret
-}
-
-define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %active, <2 x half> %x, <2 x half> %y) {
-  ; This should not be turned into a tail call.
-; DAGISEL-LABEL: tail_call_gfx_from_whole_wave:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL-NEXT:    s_wait_expcnt 0x0
-; DAGISEL-NEXT:    s_wait_samplecnt 0x0
-; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
-; DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
-; DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
-; DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
-; DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
-; DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
-; DAGISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24
-; DAGISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28
-; DAGISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32
-; DAGISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36
-; DAGISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40
-; DAGISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44
-; DAGISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48
-; DAGISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52
-; DAGISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56
-; DAGISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60
-; DAGISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64
-; DAGISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68
-; DAGISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72
-; DAGISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76
-; DAGISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80
-; DAGISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84
-; DAGISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88
-; DAGISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92
-; DAGISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96
-; DAGISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100
-; DAGISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104
-; DAGISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108
-; DAGISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112
-; DAGISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116
-; DAGISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120
-; DAGISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128
-; DAGISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132
-; DAGISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136
-; DAGISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140
-; DAGISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144
-; DAGISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148
-; DAGISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152
-; DAGISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156
-; DAGISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160
-; DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164
-; DAGISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168
-; DAGISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172
-; DAGISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176
-; DAGISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180
-; DAGISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184
-; DAGISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188
-; DAGISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192
-; DAGISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196
-; DAGISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200
-; DAGISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204
-; DAGISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208
-; DAGISEL-NEXT:    scratch_store_b32 off, ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/157166


More information about the llvm-commits mailing list