[llvm] [AMDGPU] whole wave CSR tests on gfx1250. NFC. (PR #157166)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 5 12:27:51 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Patch is 409.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157166.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll (+4036-1269)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index f7af06948ec41..7a985456379b8 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -3,6 +3,7 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefix=GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=DAGISEL64 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GISEL64 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1250 < %s | FileCheck --check-prefix=GFX1250-DAGISEL %s
; Make sure the i1 %active is passed through EXEC.
; The EXEC mask should be set to -1 for the duration of the function
@@ -106,6 +107,28 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: basic_test:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%x = select i1 %active, i32 %a, i32 5
%y = select i1 %active, i32 %b, i32 3
%ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false)
@@ -209,6 +232,28 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 %
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: single_use_of_active:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%y = select i1 %active, i32 %b, i32 17
%ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a, i32 %y, i32 1, i32 1, i32 1, i1 false)
ret i32 %ret
@@ -287,6 +332,22 @@ define amdgpu_gfx_whole_wave i32 @unused_active(i1 %active, i32 %a, i32 %b) {
; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: unused_active:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v0, 14
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
ret i32 14
}
@@ -450,6 +511,44 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) {
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_wait_alu 0xf1ff
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: csr:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x3
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT: ; clobber CSR
+; GFX1250-DAGISEL-NEXT: ;;#ASMEND
+; GFX1250-DAGISEL-NEXT: v_writelane_b32 v2, s20, 0
+; GFX1250-DAGISEL-NEXT: ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT: ; clobber non-CSR
+; GFX1250-DAGISEL-NEXT: ;;#ASMEND
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
+; GFX1250-DAGISEL-NEXT: v_readlane_b32 s20, v2, 0
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x3
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2, off, s32
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:8
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v49, off, s32 offset:16
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%x = select i1 %active, i32 %a, i32 5
%y = select i1 %active, i32 %b, i32 3
call void asm sideeffect "; clobber CSR", "~{v40},~{s48}"()
@@ -531,6 +630,22 @@ define amdgpu_gfx_whole_wave void @csr_vgpr_only(i1 %active, i32 %a, i32 %b) {
; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: csr_vgpr_only:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_or_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s32 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT: ; clobber CSR VGPR
+; GFX1250-DAGISEL-NEXT: ;;#ASMEND
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
call void asm sideeffect "; clobber CSR VGPR", "~{v40}"()
ret void
}
@@ -627,6 +742,27 @@ define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: sgpr_spill_only:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: v_writelane_b32 v0, s68, 0
+; GFX1250-DAGISEL-NEXT: ;;#ASMSTART
+; GFX1250-DAGISEL-NEXT: ; clobber CSR SGPR
+; GFX1250-DAGISEL-NEXT: ;;#ASMEND
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT: v_readlane_b32 s68, v0, 0
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
call void asm sideeffect "; clobber CSR SGPR", "~{s68}"()
ret void
}
@@ -751,6 +887,34 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: multiple_blocks:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-DAGISEL-NEXT: v_cmpx_eq_u32_e64 v0, v1
+; GFX1250-DAGISEL-NEXT: ; %bb.1: ; %if.then
+; GFX1250-DAGISEL-NEXT: v_add_nc_u32_e32 v1, v0, v1
+; GFX1250-DAGISEL-NEXT: ; %bb.2: ; %if.end
+; GFX1250-DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-DAGISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%c = icmp eq i32 %a, %b
br i1 %c, label %if.then, label %if.end
@@ -888,6 +1052,34 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: ret_64:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x3
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: v_dual_cndmask_b32 v1, 0, v1 :: v_dual_cndmask_b32 v0, 5, v0
+; GFX1250-DAGISEL-NEXT: v_dual_cndmask_b32 v2, 3, v2 :: v_dual_cndmask_b32 v3, 0, v3
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-DAGISEL-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x3
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
%x = select i1 %active, i64 %a, i64 5
%y = select i1 %active, i64 %b, i64 3
%ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
@@ -1053,6 +1245,41 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i
; GISEL64-NEXT: s_mov_b64 exec, s[34:35]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: inreg_args:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x5
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1
+; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9
+; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
+; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s10 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_clause 0x1
+; GFX1250-DAGISEL-NEXT: scratch_store_b128 off, v[0:3], s11 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s11 scope:SCOPE_SE
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
+; GFX1250-DAGISEL-NEXT: s_clause 0x5
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v0, off, s32
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
+; GFX1250-DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
store i32 %i32, ptr addrspace(5) %ptr
store <4 x i32> %v4i32, ptr addrspace(5) %ptr2
store float %float, ptr addrspace(5) %ptr2
@@ -2409,1276 +2636,1904 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_setpc_b64 s[30:31]
- %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
- ret <2 x half> %ret
-}
-
-define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %active, <2 x half> %x, <2 x half> %y) {
- ; This should not be turned into a tail call.
-; DAGISEL-LABEL: tail_call_gfx_from_whole_wave:
-; DAGISEL: ; %bb.0:
-; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; DAGISEL-NEXT: s_wait_expcnt 0x0
-; DAGISEL-NEXT: s_wait_samplecnt 0x0
-; DAGISEL-NEXT: s_wait_bvhcnt 0x0
-; DAGISEL-NEXT: s_wait_kmcnt 0x0
-; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
-; DAGISEL-NEXT: s_clause 0x1f
-; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
-; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
-; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
-; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
-; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
-; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
-; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24
-; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28
-; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32
-; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36
-; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40
-; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44
-; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48
-; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52
-; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56
-; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60
-; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64
-; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68
-; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72
-; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76
-; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80
-; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84
-; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88
-; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92
-; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96
-; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100
-; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104
-; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108
-; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112
-; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116
-; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120
-; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124
-; DAGISEL-NEXT: s_clause 0x1f
-; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128
-; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132
-; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136
-; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140
-; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144
-; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148
-; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152
-; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156
-; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160
-; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164
-; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168
-; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172
-; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176
-; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180
-; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184
-; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188
-; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192
-; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196
-; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200
-; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204
-; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208
-; DAGISEL-NEXT: scratch_store_b32 off, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/157166
More information about the llvm-commits
mailing list