[llvm] 2593838 - [AMDGPU] Autogen checks for agpr-csr.ll (#132959)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 10:28:41 PDT 2025
Author: Jeffrey Byrnes
Date: 2025-03-25T10:28:35-07:00
New Revision: 25938389c023e7122a03cd4d3536187542046c65
URL: https://github.com/llvm/llvm-project/commit/25938389c023e7122a03cd4d3536187542046c65
DIFF: https://github.com/llvm/llvm-project/commit/25938389c023e7122a03cd4d3536187542046c65.diff
LOG: [AMDGPU] Autogen checks for agpr-csr.ll (#132959)
Needed for a RegisterCoalescer patch
Added:
Modified:
llvm/test/CodeGen/AMDGPU/agpr-csr.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-csr.ll b/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
index 4f0e23b688087..e6e9ee7804190 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
@@ -1,96 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
-; GCN-LABEL: {{^}}func_empty:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
define void @func_empty() #0 {
+; GCN-LABEL: func_empty:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
-; GCN-LABEL: {{^}}func_areg_4:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: use agpr3
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
define void @func_areg_4() #0 {
+; GCN-LABEL: func_areg_4:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use agpr3
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr3", "~{a3}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_32:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: use agpr31
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
define void @func_areg_32() #0 {
+; GCN-LABEL: func_areg_32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use agpr31
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr31", "~{a31}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_33:
-; GCN-NOT: a32
-; GFX90A: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
-; GCN-NOT: a32
-; GCN: use agpr32
-; GCN-NOT: a32
-; GFX90A: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
-; GCN-NOT: a32
-; GCN: s_setpc_b64
define void @func_areg_33() #0 {
+; GFX90A-LABEL: func_areg_33:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use agpr32
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: func_areg_33:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; use agpr32
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr32", "~{a32}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_64:
-; GFX908-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
-; GCN: use agpr63
-; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
-; GCN-NOT: v_accvgpr
-; GCN: s_setpc_b64
+
define void @func_areg_64() #0 {
+; GFX90A-LABEL: func_areg_64:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use agpr63
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: func_areg_64:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; use agpr63
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr63", "~{a63}" ()
ret void
}
-; GCN-LABEL: {{^}}func_areg_31_63:
-; GFX908-NOT: buffer_
-; GFX908-NOT: v_accvgpr
-; GFX908-NOT: buffer
-; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
-; GCN: use agpr31, agpr63
-; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
-; GFX908-NOT: v_accvgpr
-; GFX908-NOT: buffer
-; GCN: s_setpc_b64
define void @func_areg_31_63() #0 {
+; GFX90A-LABEL: func_areg_31_63:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use agpr31, agpr63
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: func_areg_31_63:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; use agpr31, agpr63
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" ()
ret void
}
declare void @func_unknown() #0
-; GCN-LABEL: {{^}}test_call_empty:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: def a[0:31]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_empty() #0 {
+; GFX90A-LABEL: test_call_empty:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_empty at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_empty at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[0:31]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_empty:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_empty at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_empty at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_empty()
@@ -98,21 +238,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_areg4:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX908: def a[0:31]
-; GFX90A: def a[4:35]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_areg4() #0 {
+; GFX90A-LABEL: test_call_areg4:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_4 at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_4 at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[4:35]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_areg4:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_areg_4 at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_4 at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_4()
@@ -120,21 +373,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_areg32:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX908: def a[0:31]
-; GFX90A: def a[32:63]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_areg32() #0 {
+; GFX90A-LABEL: test_call_areg32:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_32 at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_32 at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[32:63]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_areg32:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_areg_32 at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_32 at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_32()
@@ -142,20 +508,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_areg64:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GCN: def a[0:31]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_areg64() #0 {
+; GFX90A-LABEL: test_call_areg64:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_64 at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_64 at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[0:31]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_areg64:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_areg_64 at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_64 at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_64()
@@ -163,21 +643,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_areg31_63:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX908: def a[0:31]
-; GFX90A: def a[32:63]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_areg31_63() #0 {
+; GFX90A-LABEL: test_call_areg31_63:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s22, -1
+; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
+; GFX90A-NEXT: s_add_u32 s20, s20, s11
+; GFX90A-NEXT: s_addc_u32 s21, s21, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_31_63 at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_31_63 at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[32:63]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_areg31_63:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s22, -1
+; GFX908-NEXT: s_mov_b32 s23, 0xe00000
+; GFX908-NEXT: s_add_u32 s20, s20, s11
+; GFX908-NEXT: s_addc_u32 s21, s21, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_areg_31_63 at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_31_63 at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_31_63()
@@ -185,21 +778,134 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_call_unknown:
-; GCN-NOT: buffer_
-; GCN-NOT: v_accvgpr
-; GFX908: def a[0:31]
-; GFX90A: def a[32:63]
-; GFX908-COUNT-8: v_accvgpr_read_b32
-; GFX90A-NOT: v_accvgpr
-; GCN-NOT: buffer_
-; GCN: s_swappc_b64
-; GCN-NOT: buffer_
-; GFX90A-NOT: v_accvgpr
-; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-; GCN: s_endpgm
define amdgpu_kernel void @test_call_unknown() #0 {
+; GFX90A-LABEL: test_call_unknown:
+; GFX90A: ; %bb.0: ; %bb
+; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s38, -1
+; GFX90A-NEXT: s_mov_b32 s39, 0xe00000
+; GFX90A-NEXT: s_add_u32 s36, s36, s11
+; GFX90A-NEXT: s_addc_u32 s37, s37, 0
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_add_u32 s8, s4, 36
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_addc_u32 s9, s5, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, func_unknown at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, func_unknown at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def a[32:63]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_endpgm
+;
+; GFX908-LABEL: test_call_unknown:
+; GFX908: ; %bb.0: ; %bb
+; GFX908-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX908-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX908-NEXT: s_mov_b32 s38, -1
+; GFX908-NEXT: s_mov_b32 s39, 0xe00000
+; GFX908-NEXT: s_add_u32 s36, s36, s11
+; GFX908-NEXT: s_addc_u32 s37, s37, 0
+; GFX908-NEXT: s_mov_b32 s12, s8
+; GFX908-NEXT: s_add_u32 s8, s4, 36
+; GFX908-NEXT: s_mov_b32 s13, s9
+; GFX908-NEXT: s_addc_u32 s9, s5, 0
+; GFX908-NEXT: s_getpc_b64 s[4:5]
+; GFX908-NEXT: s_add_u32 s4, s4, func_unknown at gotpcrel32@lo+4
+; GFX908-NEXT: s_addc_u32 s5, s5, func_unknown at gotpcrel32@hi+12
+; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; GFX908-NEXT: s_mov_b32 s14, s10
+; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
+; GFX908-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX908-NEXT: s_mov_b32 s32, 0
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def a[0:31]
+; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_accvgpr_read_b32 v43, a3
+; GFX908-NEXT: v_accvgpr_read_b32 v42, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v41, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v40, a0
+; GFX908-NEXT: v_accvgpr_read_b32 v47, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v46, a6
+; GFX908-NEXT: v_accvgpr_read_b32 v45, a5
+; GFX908-NEXT: v_accvgpr_read_b32 v44, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v59, a11
+; GFX908-NEXT: v_accvgpr_read_b32 v58, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v57, a9
+; GFX908-NEXT: v_accvgpr_read_b32 v56, a8
+; GFX908-NEXT: v_accvgpr_read_b32 v63, a15
+; GFX908-NEXT: v_accvgpr_read_b32 v62, a14
+; GFX908-NEXT: v_accvgpr_read_b32 v61, a13
+; GFX908-NEXT: v_accvgpr_read_b32 v60, a12
+; GFX908-NEXT: v_accvgpr_read_b32 v75, a19
+; GFX908-NEXT: v_accvgpr_read_b32 v74, a18
+; GFX908-NEXT: v_accvgpr_read_b32 v73, a17
+; GFX908-NEXT: v_accvgpr_read_b32 v72, a16
+; GFX908-NEXT: v_accvgpr_read_b32 v79, a23
+; GFX908-NEXT: v_accvgpr_read_b32 v78, a22
+; GFX908-NEXT: v_accvgpr_read_b32 v77, a21
+; GFX908-NEXT: v_accvgpr_read_b32 v76, a20
+; GFX908-NEXT: v_accvgpr_read_b32 v91, a27
+; GFX908-NEXT: v_accvgpr_read_b32 v90, a26
+; GFX908-NEXT: v_accvgpr_read_b32 v89, a25
+; GFX908-NEXT: v_accvgpr_read_b32 v88, a24
+; GFX908-NEXT: v_accvgpr_read_b32 v95, a31
+; GFX908-NEXT: v_accvgpr_read_b32 v94, a30
+; GFX908-NEXT: v_accvgpr_read_b32 v93, a29
+; GFX908-NEXT: v_accvgpr_read_b32 v92, a28
+; GFX908-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[92:95], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[88:91], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[76:79], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[72:75], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[60:63], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[56:59], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[44:47], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: global_store_dwordx4 v[0:1], v[40:43], off
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_unknown()
More information about the llvm-commits
mailing list