[llvm] [AMDGPU] Autogen checks for agpr-csr.ll (PR #132959)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 10:20:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
Needed for a RegisterCoalescer patch
---
Patch is 42.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132959.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/agpr-csr.ll (+833-127)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-csr.ll b/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
index 4f0e23b688087..e6e9ee7804190 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
@@ -1,96 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
-; GCN-LABEL: {{^}}func_empty:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
define void @func_empty() #0 {
+; GCN-LABEL: func_empty:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
-; GCN-LABEL: {{^}}func_areg_4:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: use agpr3
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
define void @func_areg_4() #0 {
+; GCN-LABEL: func_areg_4:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use agpr3
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr3", "~{a3}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_32:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: use agpr31
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
define void @func_areg_32() #0 {
+; GCN-LABEL: func_areg_32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use agpr31
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr31", "~{a31}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_33:
-; GCN-NOT: a32
-; GFX90A: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
-; GCN-NOT: a32
-; GCN: use agpr32
-; GCN-NOT: a32
-; GFX90A: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
-; GCN-NOT: a32
-; GCN: s_setpc_b64
define void @func_areg_33() #0 {
+; GFX90A-LABEL: func_areg_33:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use agpr32
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: func_areg_33:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; use agpr32
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr32", "~{a32}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_64:
-; GFX908-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
-; GCN: use agpr63
-; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
+
define void @func_areg_64() #0 {
+; GFX90A-LABEL: func_areg_64:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use agpr63
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: func_areg_64:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; use agpr63
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr63", "~{a63}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_31_63:
-; GFX908-NOT: buffer_
-; GFX908-NOT: v_accvgpr
-; GFX908-NOT: buffer
-; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
-; GCN: use agpr31, agpr63
-; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
-; GFX908-NOT: v_accvgpr
-; GFX908-NOT: buffer
-; GCN: s_setpc_b64
define void @func_areg_31_63() #0 {
+; GFX90A-LABEL: func_areg_31_63:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use agpr31, agpr63
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: func_areg_31_63:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; use agpr31, agpr63
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" ()
ret void
}
declare void @func_unknown() #0
-; GCN-LABEL: {{^}}test_call_empty:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: def a[0:31]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_empty() #0 {
+; GFX90A-LABEL: test_call_empty:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_empty at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_empty at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[0:31]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_empty:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_empty at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_empty at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_empty()
@@ -98,21 +238,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_areg4:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX908: def a[0:31]
-; GFX90A: def a[4:35]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_areg4() #0 {
+; GFX90A-LABEL: test_call_areg4:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_4 at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_4 at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[4:35]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_areg4:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_areg_4 at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_4 at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_4()
@@ -120,21 +373,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_areg32:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX908: def a[0:31]
-; GFX90A: def a[32:63]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_areg32() #0 {
+; GFX90A-LABEL: test_call_areg32:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_32 at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_32 at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[32:63]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_areg32:
+; GFX908: ; %bb.0:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/132959
More information about the llvm-commits
mailing list