[llvm] 6e47bff - [AMDGPU] callee-special-input-vgprs.ll / callee-special-input-vgprs-packed.ll - regenerate test coverage (#159587)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 08:19:52 PDT 2025
Author: Simon Pilgrim
Date: 2025-09-18T15:19:48Z
New Revision: 6e47bff24d83ea4db74cf548146baf6170aeb9f0
URL: https://github.com/llvm/llvm-project/commit/6e47bff24d83ea4db74cf548146baf6170aeb9f0
DIFF: https://github.com/llvm/llvm-project/commit/6e47bff24d83ea4db74cf548146baf6170aeb9f0.diff
LOG: [AMDGPU] callee-special-input-vgprs.ll / callee-special-input-vgprs-packed.ll - regenerate test coverage (#159587)
Added:
Modified:
llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
index d05424ffe773d..fccee3da6d77e 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
@@ -1,53 +1,94 @@
-; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
-; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7 %s
+; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A %s
target triple = "amdgcn-amd-amdhsa"
-; GCN-LABEL: {{^}}use_workitem_id_x:
-; GCN: s_waitcnt
-; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_x() #1 {
+; GFX7-LABEL: use_workitem_id_x:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_x:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_y:
-; GCN: s_waitcnt
-; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_y() #1 {
+; GFX7-LABEL: use_workitem_id_y:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_y:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_z:
-; GCN: s_waitcnt
-; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_z() #1 {
+; GFX7-LABEL: use_workitem_id_z:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_z:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_xy:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_xy() #1 {
+; GFX7-LABEL: use_workitem_id_xy:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xy:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val0, ptr addrspace(1) poison
@@ -55,17 +96,34 @@ define void @use_workitem_id_xy() #1 {
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_xyz:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_xyz() #1 {
+; GFX7-LABEL: use_workitem_id_xyz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xyz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -75,15 +133,28 @@ define void @use_workitem_id_xyz() #1 {
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_xz:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_xz() #1 {
+; GFX7-LABEL: use_workitem_id_xz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val0, ptr addrspace(1) poison
@@ -91,15 +162,28 @@ define void @use_workitem_id_xz() #1 {
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_yz:
-; GCN: s_waitcnt
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_yz() #1 {
+; GFX7-LABEL: use_workitem_id_yz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_yz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.y()
%val1 = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val0, ptr addrspace(1) poison
@@ -107,229 +191,639 @@ define void @use_workitem_id_yz() #1 {
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: s_swappc_b64
-; GCN-NOT: v31
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_x()
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
-
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v31
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_y:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v31, 10, v1
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_y:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @use_workitem_id_y()
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 1
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
-
-; GCN-NOT: v0
-; GCN-NOT: v2
-; GCN-NOT: v31
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 20, v2
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_z:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v31, 20, v2
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_z:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @use_workitem_id_z()
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
-; GCN-NOT: v0
-; GCN-NOT: v1
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDY]]
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xy:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_xy at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_xy at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT: v_or_b32_e32 v31, v0, v1
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xy:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_xy at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_xy at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @use_workitem_id_xy()
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
-; GCN-NOT: v0
-; GCN-NOT: v2
-
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDZ]]
-; GCN-NOT: v0
-; GCN-NOT: v2
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_xz at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_xz at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; GFX7-NEXT: v_or_b32_e32 v31, v0, v1
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_xz at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_xz at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @use_workitem_id_xz()
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
-; GCN-NOT: v1
-; GCN-NOT: v2
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID: v_or_b32_e32 v31, [[IDY]], [[IDZ]]
-; GCN-NOT: v1
-; GCN-NOT: v2
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_yz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_yz at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_yz at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 20, v2
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT: v_or_b32_e32 v31, v1, v0
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_yz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_yz at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_yz at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @use_workitem_id_yz()
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v2
-
-; PACKED-TID: v_mov_b32_e32 v31, v0
-
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
-; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, [[IDZ]]
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v2
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xyz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_xyz at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_xyz at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xyz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_xyz at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_xyz at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @use_workitem_id_xyz()
ret void
}
-; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
-; GCN-NOT: v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
define void @func_indirect_use_workitem_id_x() #1 {
+; GCN-LABEL: func_indirect_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @use_workitem_id_x()
ret void
}
-; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
-; GCN-NOT: v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
define void @func_indirect_use_workitem_id_y() #1 {
+; GCN-LABEL: func_indirect_use_workitem_id_y:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @use_workitem_id_y()
ret void
}
-; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
-; GCN-NOT: v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
define void @func_indirect_use_workitem_id_z() #1 {
+; GCN-LABEL: func_indirect_use_workitem_id_z:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @use_workitem_id_z()
ret void
}
-; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
+; GFX7-LABEL: other_arg_use_workitem_id_x:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: other_arg_use_workitem_id_x:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
-; GCN: s_waitcnt
-; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
+; GFX7-LABEL: other_arg_use_workitem_id_y:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: other_arg_use_workitem_id_y:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
-; GCN: s_waitcnt
-; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
+; GFX7-LABEL: other_arg_use_workitem_id_z:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: other_arg_use_workitem_id_z:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
-
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: v_mov_b32_e32 v0, 0x22b
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_indirect_other_arg_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 0x22b
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @other_arg_use_workitem_id_x(i32 555)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
-
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; GCN-NOT: v1
-; GCN: v_mov_b32_e32 v0, 0x22b
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-; GCN-NOT: v0
-
-; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
+; GFX7-LABEL: kern_indirect_other_arg_use_workitem_id_y:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_y at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_y at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v31, 10, v1
+; GFX7-NEXT: v_mov_b32_e32 v0, 0x22b
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_other_arg_use_workitem_id_y:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_y at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_y at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0x22b
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @other_arg_use_workitem_id_y(i32 555)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 1
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
-
-; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v31, 20, v2
-; PACKED-TID-DAG: v_mov_b32_e32 v31, v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
+; GFX7-LABEL: kern_indirect_other_arg_use_workitem_id_z:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_z at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_z at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v31, 20, v2
+; GFX7-NEXT: v_mov_b32_e32 v0, 0x22b
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_other_arg_use_workitem_id_z:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_z at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_z at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0x22b
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @other_arg_use_workitem_id_z(i32 555)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
-; GCN-DAG: v_and_b32_e32 v31, 0x3ff, v31
-; GCN-DAG: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @too_many_args_use_workitem_id_x(
+; GFX7-LABEL: too_many_args_use_workitem_id_x:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v31, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v1
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v2
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v3
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v5
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v6
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v7
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v8
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v9
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v10
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v11
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v12
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v13
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v14
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v15
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v16
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v17
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v18
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v19
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v20
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v21
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v22
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v23
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v24
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v25
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v26
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v27
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v28
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v29
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v30
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: too_many_args_use_workitem_id_x:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32
+; GFX90A-NEXT: v_and_b32_e32 v31, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v31, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v1, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v2, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v3, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v4, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v5, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v6, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v7, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v8, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v9, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v10, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v11, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v12, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v13, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v14, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v15, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v16, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v17, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v18, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v19, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v20, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v21, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v22, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v23, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v24, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v25, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v26, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v27, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v28, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v29, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v30, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v32, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -376,15 +870,53 @@ define void @too_many_args_use_workitem_id_x(
ret void
}
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
-
-; GCN: s_mov_b32 s32, 0
-; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 0x140
+; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @too_many_args_use_workitem_id_x(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -396,15 +928,136 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
i32 290, i32 300, i32 310, i32 320)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
-; GCN-NOT: v31
-; GCN: s_mov_b32 s33, s32
-; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; GCN-NOT: v31
-; GCN: s_swappc_b64
-; GCN-NOT: v31
define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
+; GFX7-LABEL: func_call_too_many_args_use_workitem_id_x:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 s4, s33
+; GFX7-NEXT: s_mov_b32 s33, s32
+; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[6:7]
+; GFX7-NEXT: s_addk_i32 s32, 0x400
+; GFX7-NEXT: v_writelane_b32 v40, s4, 2
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, 0x140
+; GFX7-NEXT: v_writelane_b32 v40, s30, 0
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX7-NEXT: v_mov_b32_e32 v0, 10
+; GFX7-NEXT: v_mov_b32_e32 v1, 20
+; GFX7-NEXT: v_mov_b32_e32 v2, 30
+; GFX7-NEXT: v_mov_b32_e32 v3, 40
+; GFX7-NEXT: v_mov_b32_e32 v4, 50
+; GFX7-NEXT: v_mov_b32_e32 v5, 60
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x46
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX7-NEXT: v_mov_b32_e32 v8, 0x5a
+; GFX7-NEXT: v_mov_b32_e32 v9, 0x64
+; GFX7-NEXT: v_mov_b32_e32 v10, 0x6e
+; GFX7-NEXT: v_mov_b32_e32 v11, 0x78
+; GFX7-NEXT: v_mov_b32_e32 v12, 0x82
+; GFX7-NEXT: v_mov_b32_e32 v13, 0x8c
+; GFX7-NEXT: v_mov_b32_e32 v14, 0x96
+; GFX7-NEXT: v_mov_b32_e32 v15, 0xa0
+; GFX7-NEXT: v_mov_b32_e32 v16, 0xaa
+; GFX7-NEXT: v_mov_b32_e32 v17, 0xb4
+; GFX7-NEXT: v_mov_b32_e32 v18, 0xbe
+; GFX7-NEXT: v_mov_b32_e32 v19, 0xc8
+; GFX7-NEXT: v_mov_b32_e32 v20, 0xd2
+; GFX7-NEXT: v_mov_b32_e32 v21, 0xdc
+; GFX7-NEXT: v_mov_b32_e32 v22, 0xe6
+; GFX7-NEXT: v_mov_b32_e32 v23, 0xf0
+; GFX7-NEXT: v_mov_b32_e32 v24, 0xfa
+; GFX7-NEXT: v_mov_b32_e32 v25, 0x104
+; GFX7-NEXT: v_mov_b32_e32 v26, 0x10e
+; GFX7-NEXT: v_mov_b32_e32 v27, 0x118
+; GFX7-NEXT: v_mov_b32_e32 v28, 0x122
+; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c
+; GFX7-NEXT: v_mov_b32_e32 v30, 0x136
+; GFX7-NEXT: v_writelane_b32 v40, s31, 1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: v_readlane_b32 s31, v40, 1
+; GFX7-NEXT: v_readlane_b32 s30, v40, 0
+; GFX7-NEXT: s_mov_b32 s32, s33
+; GFX7-NEXT: v_readlane_b32 s4, v40, 2
+; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[6:7]
+; GFX7-NEXT: s_mov_b32 s33, s4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: func_call_too_many_args_use_workitem_id_x:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: s_mov_b32 s4, s33
+; GFX90A-NEXT: s_mov_b32 s33, s32
+; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX90A-NEXT: s_mov_b64 exec, s[6:7]
+; GFX90A-NEXT: s_addk_i32 s32, 0x400
+; GFX90A-NEXT: v_writelane_b32 v40, s4, 2
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0x140
+; GFX90A-NEXT: v_writelane_b32 v40, s30, 0
+; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX90A-NEXT: v_mov_b32_e32 v0, 10
+; GFX90A-NEXT: v_mov_b32_e32 v1, 20
+; GFX90A-NEXT: v_mov_b32_e32 v2, 30
+; GFX90A-NEXT: v_mov_b32_e32 v3, 40
+; GFX90A-NEXT: v_mov_b32_e32 v4, 50
+; GFX90A-NEXT: v_mov_b32_e32 v5, 60
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0x46
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0x5a
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0x64
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0x6e
+; GFX90A-NEXT: v_mov_b32_e32 v11, 0x78
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0x82
+; GFX90A-NEXT: v_mov_b32_e32 v13, 0x8c
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0x96
+; GFX90A-NEXT: v_mov_b32_e32 v15, 0xa0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0xaa
+; GFX90A-NEXT: v_mov_b32_e32 v17, 0xb4
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0xbe
+; GFX90A-NEXT: v_mov_b32_e32 v19, 0xc8
+; GFX90A-NEXT: v_mov_b32_e32 v20, 0xd2
+; GFX90A-NEXT: v_mov_b32_e32 v21, 0xdc
+; GFX90A-NEXT: v_mov_b32_e32 v22, 0xe6
+; GFX90A-NEXT: v_mov_b32_e32 v23, 0xf0
+; GFX90A-NEXT: v_mov_b32_e32 v24, 0xfa
+; GFX90A-NEXT: v_mov_b32_e32 v25, 0x104
+; GFX90A-NEXT: v_mov_b32_e32 v26, 0x10e
+; GFX90A-NEXT: v_mov_b32_e32 v27, 0x118
+; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122
+; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c
+; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136
+; GFX90A-NEXT: v_writelane_b32 v40, s31, 1
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: v_readlane_b32 s31, v40, 1
+; GFX90A-NEXT: v_readlane_b32 s30, v40, 0
+; GFX90A-NEXT: s_mov_b32 s32, s33
+; GFX90A-NEXT: v_readlane_b32 s4, v40, 2
+; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX90A-NEXT: s_mov_b64 exec, s[6:7]
+; GFX90A-NEXT: s_mov_b32 s33, s4
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) poison
call void @too_many_args_use_workitem_id_x(
i32 10, i32 20, i32 30, i32 40,
@@ -419,19 +1072,38 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
}
; Requires loading and storing to stack slot.
-; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
-; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
-; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GCN-DAG: buffer_load_dword [[TMP_REG:v[0-9]+]], off, s[0:3], s33{{$}}
-
-; GCN: buffer_store_dword [[TMP_REG]], off, s[0:3], s32{{$}}
-
-; GCN: s_swappc_b64
-
-; GCN: s_mov_b32 s32, s33
-; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GCN: s_setpc_b64
define void @too_many_args_call_too_many_args_use_workitem_id_x(
+; GCN-LABEL: too_many_args_call_too_many_args_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -447,16 +1119,156 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x(
; stack layout:
; frame[0] = stack passed arg23
; frame[1] = byval arg32
-
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
-; GCN-DAG: v_and_b32_e32 v31, 0x3ff, v31
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
-; GCN-DAG: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[LOAD_ARG31]]
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @too_many_args_use_workitem_id_x_byval(
+; GFX7-LABEL: too_many_args_use_workitem_id_x_byval:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v31, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v1
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v2
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v3
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v5
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v6
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v7
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v8
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v9
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v10
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v11
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v12
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v13
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v14
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v15
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v16
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v17
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v18
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v19
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v20
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v21
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v22
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v23
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v24
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v25
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v26
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v27
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v28
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v29
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v30
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: too_many_args_use_workitem_id_x_byval:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32
+; GFX90A-NEXT: v_and_b32_e32 v31, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v31, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v1, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v2, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v3, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v4, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v5, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v6, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v7, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v8, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v9, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v10, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v11, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v12, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v13, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v14, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v15, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v16, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v17, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v18, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v19, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v20, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v21, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v22, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v23, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v24, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v25, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v26, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v27, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v28, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v29, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v30, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v32, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -505,25 +1317,60 @@ define void @too_many_args_use_workitem_id_x_byval(
; sp[0] = stack passed %arg31
; sp[1] = byval
-
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
-
; Local stack object initialize. Offset 0 is the emergency spill slot.
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; GCN-DAG: s_movk_i32 s32, 0x400
-; GCN: buffer_store_dword [[K]], off, s[0:3], 0
-
-; Pass %arg31 on stack
-; GCN: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
-; GCN: buffer_store_dword [[K1:v[0-9]+]], off, s[0:3], s32{{$}}
-
-; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0
-; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
-; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
+; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x_byval:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7
+; GCN-NEXT: s_movk_i32 s32, 0x400
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 0x140
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 999, ptr addrspace(5) %alloca
call void @too_many_args_use_workitem_id_x_byval(
@@ -538,16 +1385,76 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
ptr addrspace(5) byval(i32) %alloca)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
-; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; GFX7: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; GFX90A: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
-; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
-; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
-; GCN: s_swappc_b64
define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
+; GCN-LABEL: func_call_too_many_args_use_workitem_id_x_byval:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 0x140
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 999, ptr addrspace(5) %alloca
call void @too_many_args_use_workitem_id_x_byval(
@@ -563,28 +1470,164 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
ret void
}
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
-; GFX90A: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
-; GFX90A: v_and_b32_e32 [[ID_X:v[0-9]+]], 0x3ff, v31
-; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_X]], off{{$}}
-; GFX90A: v_bfe_u32 [[ID_Y:v[0-9]+]], v31, 10, 10
-; GFX90A: v_bfe_u32 [[ID_Z:v[0-9]+]], v31, 20, 10
-; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_Y]], off{{$}}
-; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_Z]], off{{$}}
-
-; GFX7: v_and_b32_e32 v32, 0x3ff, v31
-; GFX7: v_bfe_u32 v32, v31, 10, 10
-; GCN7: v_bfe_u32 v31, v31, 20, 10
-; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32{{$}}
-; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31{{$}}
-; GFX7: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
-
-; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
-; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
-
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @too_many_args_use_workitem_id_xyz(
+; GFX7-LABEL: too_many_args_use_workitem_id_xyz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v32, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v32
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v32, v31, 10, 10
+; GFX7-NEXT: v_bfe_u32 v31, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v32
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v1
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v2
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v3
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v5
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v6
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v7
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v8
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v9
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v10
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v11
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v12
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v13
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v14
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v15
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v16
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v17
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v18
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v19
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v20
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v21
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v22
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v23
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v24
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v25
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v26
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v27
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v28
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v29
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v30
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: too_many_args_use_workitem_id_xyz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32
+; GFX90A-NEXT: v_and_b32_e32 v33, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v33, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v33, v31, 10, 10
+; GFX90A-NEXT: v_bfe_u32 v31, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v33, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v31, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v1, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v2, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v3, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v4, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v5, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v6, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v7, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v8, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v9, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v10, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v11, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v12, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v13, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v14, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v15, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v16, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v17, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v18, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v19, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v20, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v21, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v22, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v23, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v24, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v25, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v26, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v27, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v28, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v29, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v30, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v32, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -636,24 +1679,103 @@ define void @too_many_args_use_workitem_id_xyz(
}
; frame[0] = ID { Z, Y, X }
-
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
-
-; GCN-DAG: s_mov_b32 s32, 0
-
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
-; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
-; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
-; PACKED-TID-NOT: v0
-; PACKED-TID-NOT: v1
-; PACKED-TID-NOT: v2
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
-; GCN-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
+; GFX7-LABEL: kern_call_too_many_args_use_workitem_id_xyz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_xyz at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_xyz at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x140
+; GFX7-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT: buffer_store_dword v3, off, s[0:3], s32
+; GFX7-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7-NEXT: v_mov_b32_e32 v0, 10
+; GFX7-NEXT: v_mov_b32_e32 v1, 20
+; GFX7-NEXT: v_mov_b32_e32 v2, 30
+; GFX7-NEXT: v_mov_b32_e32 v3, 40
+; GFX7-NEXT: v_mov_b32_e32 v4, 50
+; GFX7-NEXT: v_mov_b32_e32 v5, 60
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x46
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX7-NEXT: v_mov_b32_e32 v8, 0x5a
+; GFX7-NEXT: v_mov_b32_e32 v9, 0x64
+; GFX7-NEXT: v_mov_b32_e32 v10, 0x6e
+; GFX7-NEXT: v_mov_b32_e32 v11, 0x78
+; GFX7-NEXT: v_mov_b32_e32 v12, 0x82
+; GFX7-NEXT: v_mov_b32_e32 v13, 0x8c
+; GFX7-NEXT: v_mov_b32_e32 v14, 0x96
+; GFX7-NEXT: v_mov_b32_e32 v15, 0xa0
+; GFX7-NEXT: v_mov_b32_e32 v16, 0xaa
+; GFX7-NEXT: v_mov_b32_e32 v17, 0xb4
+; GFX7-NEXT: v_mov_b32_e32 v18, 0xbe
+; GFX7-NEXT: v_mov_b32_e32 v19, 0xc8
+; GFX7-NEXT: v_mov_b32_e32 v20, 0xd2
+; GFX7-NEXT: v_mov_b32_e32 v21, 0xdc
+; GFX7-NEXT: v_mov_b32_e32 v22, 0xe6
+; GFX7-NEXT: v_mov_b32_e32 v23, 0xf0
+; GFX7-NEXT: v_mov_b32_e32 v24, 0xfa
+; GFX7-NEXT: v_mov_b32_e32 v25, 0x104
+; GFX7-NEXT: v_mov_b32_e32 v26, 0x10e
+; GFX7-NEXT: v_mov_b32_e32 v27, 0x118
+; GFX7-NEXT: v_mov_b32_e32 v28, 0x122
+; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c
+; GFX7-NEXT: v_mov_b32_e32 v30, 0x136
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_call_too_many_args_use_workitem_id_xyz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_xyz at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_xyz at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x140
+; GFX90A-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 10
+; GFX90A-NEXT: v_mov_b32_e32 v1, 20
+; GFX90A-NEXT: v_mov_b32_e32 v2, 30
+; GFX90A-NEXT: v_mov_b32_e32 v3, 40
+; GFX90A-NEXT: v_mov_b32_e32 v4, 50
+; GFX90A-NEXT: v_mov_b32_e32 v5, 60
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0x46
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0x5a
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0x64
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0x6e
+; GFX90A-NEXT: v_mov_b32_e32 v11, 0x78
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0x82
+; GFX90A-NEXT: v_mov_b32_e32 v13, 0x8c
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0x96
+; GFX90A-NEXT: v_mov_b32_e32 v15, 0xa0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0xaa
+; GFX90A-NEXT: v_mov_b32_e32 v17, 0xb4
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0xbe
+; GFX90A-NEXT: v_mov_b32_e32 v19, 0xc8
+; GFX90A-NEXT: v_mov_b32_e32 v20, 0xd2
+; GFX90A-NEXT: v_mov_b32_e32 v21, 0xdc
+; GFX90A-NEXT: v_mov_b32_e32 v22, 0xe6
+; GFX90A-NEXT: v_mov_b32_e32 v23, 0xf0
+; GFX90A-NEXT: v_mov_b32_e32 v24, 0xfa
+; GFX90A-NEXT: v_mov_b32_e32 v25, 0x104
+; GFX90A-NEXT: v_mov_b32_e32 v26, 0x10e
+; GFX90A-NEXT: v_mov_b32_e32 v27, 0x118
+; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122
+; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c
+; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @too_many_args_use_workitem_id_xyz(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -665,24 +1787,163 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
i32 290, i32 300, i32 310, i32 320)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
; workitem ID X in register, yz on stack
; v31 = workitem ID X
; frame[0] = workitem { Z, Y, X }
-
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
-
-; GCN-COUNT-31: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}
-; GCN-NEXT: s_waitcnt
-; GCN: s_setpc_b64
-; GCN: ScratchSize: 0
define void @too_many_args_use_workitem_id_x_stack_yz(
+; GFX7-LABEL: too_many_args_use_workitem_id_x_stack_yz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v32, 0x3ff, v31
+; GFX7-NEXT: flat_store_dword v[0:1], v32
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_bfe_u32 v32, v31, 10, 10
+; GFX7-NEXT: v_bfe_u32 v31, v31, 20, 10
+; GFX7-NEXT: flat_store_dword v[0:1], v32
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v1
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v2
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v3
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v5
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v6
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v7
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v8
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v9
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v10
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v11
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v12
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v13
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v14
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v15
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v16
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v17
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v18
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v19
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v20
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v21
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v22
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v23
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v24
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v25
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v26
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v27
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v28
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v29
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: flat_store_dword v[0:1], v30
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: too_many_args_use_workitem_id_x_stack_yz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_and_b32_e32 v32, 0x3ff, v31
+; GFX90A-NEXT: global_store_dword v[0:1], v32, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_bfe_u32 v32, v31, 10, 10
+; GFX90A-NEXT: v_bfe_u32 v31, v31, 20, 10
+; GFX90A-NEXT: global_store_dword v[0:1], v32, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v31, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v0, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v1, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v2, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v3, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v4, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v5, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v6, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v7, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v8, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v9, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v10, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v11, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v12, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v13, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v14, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v15, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v16, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v17, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v18, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v19, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v20, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v21, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v22, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v23, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v24, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v25, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v26, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v27, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v28, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v29, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dword v[0:1], v30, off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -731,21 +1992,101 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
ret void
}
+; GCN: ScratchSize: 0
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
-
-; GCN-NOT: v0
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
-; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
-; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
-; PACKED-TID: v_mov_b32_e32 v31, v0
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
+; GFX7-LABEL: kern_call_too_many_args_use_workitem_id_x_stack_yz:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_add_u32 s0, s0, s5
+; GFX7-NEXT: s_addc_u32 s1, s1, 0
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_stack_yz at gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_stack_yz at gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7-NEXT: v_mov_b32_e32 v0, 10
+; GFX7-NEXT: v_mov_b32_e32 v1, 20
+; GFX7-NEXT: v_mov_b32_e32 v2, 30
+; GFX7-NEXT: v_mov_b32_e32 v3, 40
+; GFX7-NEXT: v_mov_b32_e32 v4, 50
+; GFX7-NEXT: v_mov_b32_e32 v5, 60
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x46
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX7-NEXT: v_mov_b32_e32 v8, 0x5a
+; GFX7-NEXT: v_mov_b32_e32 v9, 0x64
+; GFX7-NEXT: v_mov_b32_e32 v10, 0x6e
+; GFX7-NEXT: v_mov_b32_e32 v11, 0x78
+; GFX7-NEXT: v_mov_b32_e32 v12, 0x82
+; GFX7-NEXT: v_mov_b32_e32 v13, 0x8c
+; GFX7-NEXT: v_mov_b32_e32 v14, 0x96
+; GFX7-NEXT: v_mov_b32_e32 v15, 0xa0
+; GFX7-NEXT: v_mov_b32_e32 v16, 0xaa
+; GFX7-NEXT: v_mov_b32_e32 v17, 0xb4
+; GFX7-NEXT: v_mov_b32_e32 v18, 0xbe
+; GFX7-NEXT: v_mov_b32_e32 v19, 0xc8
+; GFX7-NEXT: v_mov_b32_e32 v20, 0xd2
+; GFX7-NEXT: v_mov_b32_e32 v21, 0xdc
+; GFX7-NEXT: v_mov_b32_e32 v22, 0xe6
+; GFX7-NEXT: v_mov_b32_e32 v23, 0xf0
+; GFX7-NEXT: v_mov_b32_e32 v24, 0xfa
+; GFX7-NEXT: v_mov_b32_e32 v25, 0x104
+; GFX7-NEXT: v_mov_b32_e32 v26, 0x10e
+; GFX7-NEXT: v_mov_b32_e32 v27, 0x118
+; GFX7-NEXT: v_mov_b32_e32 v28, 0x122
+; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c
+; GFX7-NEXT: v_mov_b32_e32 v30, 0x136
+; GFX7-NEXT: s_mov_b32 s32, 0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT: s_endpgm
+;
+; GFX90A-LABEL: kern_call_too_many_args_use_workitem_id_x_stack_yz:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_add_u32 s0, s0, s5
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_getpc_b64 s[4:5]
+; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_stack_yz at gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_stack_yz at gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: v_mov_b32_e32 v0, 10
+; GFX90A-NEXT: v_mov_b32_e32 v1, 20
+; GFX90A-NEXT: v_mov_b32_e32 v2, 30
+; GFX90A-NEXT: v_mov_b32_e32 v3, 40
+; GFX90A-NEXT: v_mov_b32_e32 v4, 50
+; GFX90A-NEXT: v_mov_b32_e32 v5, 60
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0x46
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0x5a
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0x64
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0x6e
+; GFX90A-NEXT: v_mov_b32_e32 v11, 0x78
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0x82
+; GFX90A-NEXT: v_mov_b32_e32 v13, 0x8c
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0x96
+; GFX90A-NEXT: v_mov_b32_e32 v15, 0xa0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0xaa
+; GFX90A-NEXT: v_mov_b32_e32 v17, 0xb4
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0xbe
+; GFX90A-NEXT: v_mov_b32_e32 v19, 0xc8
+; GFX90A-NEXT: v_mov_b32_e32 v20, 0xd2
+; GFX90A-NEXT: v_mov_b32_e32 v21, 0xdc
+; GFX90A-NEXT: v_mov_b32_e32 v22, 0xe6
+; GFX90A-NEXT: v_mov_b32_e32 v23, 0xf0
+; GFX90A-NEXT: v_mov_b32_e32 v24, 0xfa
+; GFX90A-NEXT: v_mov_b32_e32 v25, 0x104
+; GFX90A-NEXT: v_mov_b32_e32 v26, 0x10e
+; GFX90A-NEXT: v_mov_b32_e32 v27, 0x118
+; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122
+; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c
+; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT: s_endpgm
call void @too_many_args_use_workitem_id_x_stack_yz(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -757,6 +2098,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
i32 290, i32 300, i32 310)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index b671d68a4b75b..bb2f06bfe83f8 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -1,53 +1,58 @@
-; RUN: opt -mcpu=kaveri -passes=amdgpu-attributor < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mcpu=kaveri -passes=amdgpu-attributor < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN %s
target triple = "amdgcn-amd-amdhsa"
-; GCN-LABEL: {{^}}use_workitem_id_x:
-; GCN: s_waitcnt
-; FIXEDABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_x() #1 {
+; GCN-LABEL: use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_y:
-; GCN: s_waitcnt
-; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_y() #1 {
+; GCN-LABEL: use_workitem_id_y:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_z:
-; GCN: s_waitcnt
-; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_z() #1 {
+; GCN-LABEL: use_workitem_id_z:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_xy:
-; GCN: s_waitcnt
-; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_xy() #1 {
+; GCN-LABEL: use_workitem_id_xy:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val0, ptr addrspace(1) poison
@@ -55,20 +60,20 @@ define void @use_workitem_id_xy() #1 {
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_xyz:
-; GCN: s_waitcnt
-
-; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-
-
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_xyz() #1 {
+; GCN-LABEL: use_workitem_id_xyz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -78,16 +83,17 @@ define void @use_workitem_id_xyz() #1 {
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_xz:
-; GCN: s_waitcnt
-; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_xz() #1 {
+; GCN-LABEL: use_workitem_id_xz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val0, ptr addrspace(1) poison
@@ -95,16 +101,17 @@ define void @use_workitem_id_xz() #1 {
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_yz:
-; GCN: s_waitcnt
-; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @use_workitem_id_yz() #1 {
+; GCN-LABEL: use_workitem_id_yz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val0 = call i32 @llvm.amdgcn.workitem.id.y()
%val1 = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val0, ptr addrspace(1) poison
@@ -112,229 +119,423 @@ define void @use_workitem_id_yz() #1 {
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v31
-; FIXEDABI: v_mov_b32_e32 v31, v0{{$}}
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v31
-
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_x()
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-
-; GCN: s_swappc_b64
-; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_y:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v31, 10, v1
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_y()
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 1
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_z:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v31, 20, v2
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_z()
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI: v_lshlrev_b32_e32 v1, 10, v1
-; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_xy:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_xy at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_xy at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: v_or_b32_e32 v31, v0, v1
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_xy()
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI: v_lshlrev_b32_e32 v1, 20, v2
-; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_xz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_xz at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_xz at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; GCN-NEXT: v_or_b32_e32 v31, v0, v1
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_xz()
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI:v_lshlrev_b32_e32 v0, 20, v2
-; FIXEDABI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; FIXEDABI-NEXT: v_or_b32_e32 v31, v1, v0
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_yz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_yz at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_yz at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 20, v2
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: v_or_b32_e32 v31, v1, v0
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_yz()
ret void
}
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_xyz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_xyz at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_xyz at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GCN-NEXT: v_or_b32_e32 v0, v0, v1
+; GCN-NEXT: v_or_b32_e32 v31, v0, v2
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @use_workitem_id_xyz()
ret void
}
-; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
-; GCN-NOT: v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
define void @func_indirect_use_workitem_id_x() #1 {
+; GCN-LABEL: func_indirect_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @use_workitem_id_x()
ret void
}
-; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
-; GCN-NOT: v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
define void @func_indirect_use_workitem_id_y() #1 {
+; GCN-LABEL: func_indirect_use_workitem_id_y:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @use_workitem_id_y()
ret void
}
-; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
-; GCN-NOT: v0
-; GCN: s_swappc_b64
-; GCN-NOT: v0
define void @func_indirect_use_workitem_id_z() #1 {
+; GCN-LABEL: func_indirect_use_workitem_id_z:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @use_workitem_id_z()
ret void
}
-; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
-; GCN: s_waitcnt
-; FIXEDABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
-
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
+; GCN-LABEL: other_arg_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
-; GCN: s_waitcnt
-; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
+; GCN-LABEL: other_arg_use_workitem_id_y:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 10, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
-; GCN: s_waitcnt
-; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
+; GCN-LABEL: other_arg_use_workitem_id_z:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %arg0, ptr addrspace(1) poison
store volatile i32 %val, ptr addrspace(1) poison
ret void
}
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI: v_mov_b32_e32 v31, v0
-; FIXEDABI: v_mov_b32_e32 v0, 0x22b
-
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_indirect_other_arg_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 0x22b
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @other_arg_use_workitem_id_x(i32 555)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
-; FIXEDABI: v_mov_b32_e32 v0, 0x22b
-
-; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
+; GCN-LABEL: kern_indirect_other_arg_use_workitem_id_y:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_y at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_y at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v31, 10, v1
+; GCN-NEXT: v_mov_b32_e32 v0, 0x22b
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @other_arg_use_workitem_id_y(i32 555)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 1
-; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
-; FIXEDABI: v_mov_b32_e32 v0, 0x22b
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
+; GCN-LABEL: kern_indirect_other_arg_use_workitem_id_z:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_z at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_z at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v31, 20, v2
+; GCN-NEXT: v_mov_b32_e32 v0, 0x22b
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @other_arg_use_workitem_id_z(i32 555)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
-; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
-; FIXEDABI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
define void @too_many_args_use_workitem_id_x(
+; GCN-LABEL: too_many_args_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v31, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v5
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v6
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v8
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v9
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v10
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v11
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v12
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v13
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v14
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v15
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v16
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v17
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v18
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v19
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v20
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v21
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v22
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v23
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v24
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v25
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v26
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v27
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v28
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v29
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v30
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -381,20 +582,53 @@ define void @too_many_args_use_workitem_id_x(
ret void
}
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI-DAG: s_mov_b32 s32, 0
-; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
-; FIXEDABI-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
-; FIXEDABI-DAG: v_mov_b32_e32 v31, v0
-
-; FIXEDABI: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 0x140
+; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @too_many_args_use_workitem_id_x(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -406,18 +640,72 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
i32 290, i32 300, i32 310, i32 320)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 0
-; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
-
-; Touching the workitem id register is not necessary.
-; FIXEDABI-NOT: v31
-; FIXEDABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
-; FIXEDABI-NOT: v31
-; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
-; FIXEDABI-NOT: v31
-
-; GCN: s_swappc_b64
define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
+; GCN-LABEL: func_call_too_many_args_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 0x140
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) poison
call void @too_many_args_use_workitem_id_x(
i32 10, i32 20, i32 30, i32 40,
@@ -432,19 +720,38 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
}
; Requires loading and storing to stack slot.
-; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
-; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
-; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
-
-; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
-
-; GCN: s_swappc_b64
-
-; GCN: s_mov_b32 s32, s33
-; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GCN: s_setpc_b64
define void @too_many_args_call_too_many_args_use_workitem_id_x(
+; GCN-LABEL: too_many_args_call_too_many_args_use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -461,17 +768,81 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x(
; frame[0] = byval arg32
; frame[1] = stack passed workitem ID x
; frame[2] = VGPR spill slot
-
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
-
-; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
-; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
-
-; FIXEDABI: buffer_load_dword v31, off, s[0:3], s32{{$}}
-; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; FIXEDABI: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc{{$}}
-; FIXEDABI: s_setpc_b64
define void @too_many_args_use_workitem_id_x_byval(
+; GCN-LABEL: too_many_args_use_workitem_id_x_byval:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v31, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v5
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v6
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v8
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v9
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v10
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v11
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v12
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v13
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v14
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v15
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v16
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v17
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v18
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v19
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v20
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v21
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v22
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v23
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v24
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v25
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v26
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v27
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v28
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v29
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v30
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -522,27 +893,59 @@ define void @too_many_args_use_workitem_id_x_byval(
; sp[0] = byval
; sp[1] = ??
; sp[2] = stack passed workitem ID x
-
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
-
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
-; FIXEDABI-NOT: v2
-; FIXEDABI: v_mov_b32_e32 v31, v0
-; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7
-; FIXEDABI: s_movk_i32 s32, 0x400{{$}}
-; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], 0{{$}}
-; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140
-
-; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
-
-; FIXME: Why this reload?
-; FIXEDABI: buffer_load_dword [[RELOAD:v[0-9]+]], off, s[0:3], 0{{$}}
-
-; FIXEDABI-NOT: s32
-; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32
-; FIXEDABI: s_swappc_b64
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
+; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x_byval:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: v_mov_b32_e32 v31, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7
+; GCN-NEXT: s_movk_i32 s32, 0x400
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 0x140
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 999, ptr addrspace(5) %alloca
call void @too_many_args_use_workitem_id_x_byval(
@@ -558,19 +961,74 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
ret void
}
-; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
-; FIXED-ABI-NOT: v31
-; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}}
-; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}}
-; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
-; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
-; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
-
-; FIXED-ABI-NOT: v31
-; FIXEDABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
-; FIXED-ABI-NOT: v31
-; FIXEDABI: s_swappc_b64
define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
+; GCN-LABEL: func_call_too_many_args_use_workitem_id_x_byval:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 0x140
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33
+; GCN-NEXT: v_writelane_b32 v40, s4, 2
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 999, ptr addrspace(5) %alloca
call void @too_many_args_use_workitem_id_x_byval(
@@ -586,17 +1044,85 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
ret void
}
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
-; FIXEDABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v31
-; FIXEDABI-NOT: buffer_load_dword
-; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
-; FIXEDABI-NOT: buffer_load_dword
-; FIXEDABI: v_bfe_u32 [[BFE_Y:v[0-9]+]], v31, 10, 10
-; FIXEDABI-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v31, 20, 10
-; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]]
-; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]]
-
define void @too_many_args_use_workitem_id_xyz(
+; GCN-LABEL: too_many_args_use_workitem_id_xyz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v32, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v32
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v32, v31, 10, 10
+; GCN-NEXT: v_bfe_u32 v31, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v32
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v5
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v6
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v8
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v9
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v10
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v11
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v12
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v13
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v14
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v15
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v16
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v17
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v18
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v19
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v20
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v21
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v22
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v23
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v24
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v25
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v26
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v27
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v28
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v29
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v30
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -647,22 +1173,56 @@ define void @too_many_args_use_workitem_id_xyz(
ret void
}
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
-
-; GCN-DAG: s_mov_b32 s32, 0
-
-; GCN-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
-; GCN-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; GCN-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-
-; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
-; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
+; GCN-LABEL: kern_call_too_many_args_use_workitem_id_xyz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_xyz at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_xyz at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: v_mov_b32_e32 v3, 0x140
+; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GCN-NEXT: v_or_b32_e32 v0, v0, v1
+; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32
+; GCN-NEXT: v_or_b32_e32 v31, v0, v2
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @too_many_args_use_workitem_id_xyz(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -674,21 +1234,87 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
i32 290, i32 300, i32 310, i32 320)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
; Var abi: workitem ID X in register, yz on stack
; v31 = workitem ID X
; frame[0] = workitem { Z, Y, X }
-
-; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
-; GCN: s_setpc_b64
-; GCN: ScratchSize: 0
define void @too_many_args_use_workitem_id_x_stack_yz(
+; GCN-LABEL: too_many_args_use_workitem_id_x_stack_yz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v32, 0x3ff, v31
+; GCN-NEXT: flat_store_dword v[0:1], v32
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_bfe_u32 v32, v31, 10, 10
+; GCN-NEXT: v_bfe_u32 v31, v31, 20, 10
+; GCN-NEXT: flat_store_dword v[0:1], v32
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v31
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v5
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v6
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v8
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v9
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v10
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v11
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v12
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v13
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v14
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v15
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v16
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v17
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v18
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v19
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v20
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v21
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v22
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v23
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v24
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v25
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v26
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v27
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v28
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v29
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: flat_store_dword v[0:1], v30
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
@@ -737,20 +1363,56 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
ret void
}
+; GCN: ScratchSize: 0
-; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
-
-; GCN-NOT: v0
-; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
-; GCN-DAG: v_or_b32_e32 v0, v0, v1
-; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2
-; GCN-DAG: v_or_b32_e32 v31, v0, v2
-
-; GCN: s_mov_b32 s32, 0
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
+; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x_stack_yz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_u32 s0, s0, s5
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_stack_yz at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_stack_yz at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GCN-NEXT: v_or_b32_e32 v0, v0, v1
+; GCN-NEXT: v_or_b32_e32 v31, v0, v2
+; GCN-NEXT: v_mov_b32_e32 v0, 10
+; GCN-NEXT: v_mov_b32_e32 v1, 20
+; GCN-NEXT: v_mov_b32_e32 v2, 30
+; GCN-NEXT: v_mov_b32_e32 v3, 40
+; GCN-NEXT: v_mov_b32_e32 v4, 50
+; GCN-NEXT: v_mov_b32_e32 v5, 60
+; GCN-NEXT: v_mov_b32_e32 v6, 0x46
+; GCN-NEXT: v_mov_b32_e32 v7, 0x50
+; GCN-NEXT: v_mov_b32_e32 v8, 0x5a
+; GCN-NEXT: v_mov_b32_e32 v9, 0x64
+; GCN-NEXT: v_mov_b32_e32 v10, 0x6e
+; GCN-NEXT: v_mov_b32_e32 v11, 0x78
+; GCN-NEXT: v_mov_b32_e32 v12, 0x82
+; GCN-NEXT: v_mov_b32_e32 v13, 0x8c
+; GCN-NEXT: v_mov_b32_e32 v14, 0x96
+; GCN-NEXT: v_mov_b32_e32 v15, 0xa0
+; GCN-NEXT: v_mov_b32_e32 v16, 0xaa
+; GCN-NEXT: v_mov_b32_e32 v17, 0xb4
+; GCN-NEXT: v_mov_b32_e32 v18, 0xbe
+; GCN-NEXT: v_mov_b32_e32 v19, 0xc8
+; GCN-NEXT: v_mov_b32_e32 v20, 0xd2
+; GCN-NEXT: v_mov_b32_e32 v21, 0xdc
+; GCN-NEXT: v_mov_b32_e32 v22, 0xe6
+; GCN-NEXT: v_mov_b32_e32 v23, 0xf0
+; GCN-NEXT: v_mov_b32_e32 v24, 0xfa
+; GCN-NEXT: v_mov_b32_e32 v25, 0x104
+; GCN-NEXT: v_mov_b32_e32 v26, 0x10e
+; GCN-NEXT: v_mov_b32_e32 v27, 0x118
+; GCN-NEXT: v_mov_b32_e32 v28, 0x122
+; GCN-NEXT: v_mov_b32_e32 v29, 0x12c
+; GCN-NEXT: v_mov_b32_e32 v30, 0x136
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_endpgm
call void @too_many_args_use_workitem_id_x_stack_yz(
i32 10, i32 20, i32 30, i32 40,
i32 50, i32 60, i32 70, i32 80,
@@ -762,30 +1424,61 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
i32 290, i32 300, i32 310)
ret void
}
+; GCN: .amdhsa_system_vgpr_workitem_id 2
declare hidden void @extern_hint(i32) #2
; Workitem IDs should not be passed due to the attribute
-; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints:
-; GCN-NOT: v30
-; GCN-NOT: v31
-; GCN: v_mov_b32_e32 v0, 9
-; GCN-NOT: v0
-; GCN-NOT: v31
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
+; GCN-LABEL: kern_call_no_workitem_id_hints:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_i32 s12, s12, s17
+; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-NEXT: s_add_u32 s0, s0, s17
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GCN-NEXT: s_mov_b32 s13, s15
+; GCN-NEXT: s_mov_b32 s12, s14
+; GCN-NEXT: s_getpc_b64 s[18:19]
+; GCN-NEXT: s_add_u32 s18, s18, extern_hint at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s19, s19, extern_hint at rel32@hi+12
+; GCN-NEXT: s_mov_b32 s14, s16
+; GCN-NEXT: v_mov_b32_e32 v0, 9
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GCN-NEXT: s_endpgm
call void @extern_hint(i32 9)
ret void
}
-; GCN-LABEL: {{^}}func_call_no_workitem_id_hints:
-; GCN-NOT: v30
-; GCN-NOT: v31
-; GCN: v_mov_b32_e32 v0, 9
-; GCN-NOT: v0
-; GCN-NOT: v31
-; GCN: s_swappc_b64
define void @func_call_no_workitem_id_hints() #2 {
+; GCN-LABEL: func_call_no_workitem_id_hints:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s16, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[18:19]
+; GCN-NEXT: v_writelane_b32 v40, s16, 2
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: s_getpc_b64 s[16:17]
+; GCN-NEXT: s_add_u32 s16, s16, extern_hint at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s17, s17, extern_hint at rel32@hi+12
+; GCN-NEXT: v_mov_b32_e32 v0, 9
+; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GCN-NEXT: v_readlane_b32 s31, v40, 1
+; GCN-NEXT: v_readlane_b32 s30, v40, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: v_readlane_b32 s4, v40, 2
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_mov_b32 s33, s4
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void @extern_hint(i32 9)
ret void
}
@@ -794,14 +1487,24 @@ declare hidden void @extern_nohint(i32)
; Check that the hint is respected on the callsite, not the function
; declaration
-; GCN-LABEL: {{^}}kern_callsite_workitem_id_hints:
-; GCN-NOT: v30
-; GCN-NOT: v31
-; GCN: v_mov_b32_e32 v0, 9
-; GCN-NOT: v0
-; GCN-NOT: v31
-; GCN: s_swappc_b64
define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
+; GCN-LABEL: kern_callsite_workitem_id_hints:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_add_i32 s12, s12, s17
+; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-NEXT: s_add_u32 s0, s0, s17
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GCN-NEXT: s_mov_b32 s13, s15
+; GCN-NEXT: s_mov_b32 s12, s14
+; GCN-NEXT: s_getpc_b64 s[18:19]
+; GCN-NEXT: s_add_u32 s18, s18, extern_nohint at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s19, s19, extern_nohint at rel32@hi+12
+; GCN-NEXT: s_mov_b32 s14, s16
+; GCN-NEXT: v_mov_b32_e32 v0, 9
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GCN-NEXT: s_endpgm
call void @extern_nohint(i32 9) #2
ret void
}
More information about the llvm-commits
mailing list