[llvm] [AMDGPU] callee-special-input-vgprs.ll / callee-special-input-vgprs-packed.ll - regenerate test coverage (PR #159587)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 18 07:41:13 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>



---

Patch is 153.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159587.diff


2 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll (+1657-315) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (+1042-339) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
index d05424ffe773d..fccee3da6d77e 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
@@ -1,53 +1,94 @@
-; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
-; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7 %s
+; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A %s
 
 target triple = "amdgcn-amd-amdhsa"
 
-; GCN-LABEL: {{^}}use_workitem_id_x:
-; GCN: s_waitcnt
-; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_x() #1 {
+; GFX7-LABEL: use_workitem_id_x:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_x:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_y:
-; GCN: s_waitcnt
-; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_y() #1 {
+; GFX7-LABEL: use_workitem_id_y:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_y:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_z:
-; GCN: s_waitcnt
-; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_z() #1 {
+; GFX7-LABEL: use_workitem_id_z:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_z:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_xy:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_xy() #1 {
+; GFX7-LABEL: use_workitem_id_xy:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xy:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val0, ptr addrspace(1) poison
@@ -55,17 +96,34 @@ define void @use_workitem_id_xy() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_xyz:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_xyz() #1 {
+; GFX7-LABEL: use_workitem_id_xyz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xyz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -75,15 +133,28 @@ define void @use_workitem_id_xyz() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_xz:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_xz() #1 {
+; GFX7-LABEL: use_workitem_id_xz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) poison
@@ -91,15 +162,28 @@ define void @use_workitem_id_xz() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_yz:
-; GCN: s_waitcnt
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_yz() #1 {
+; GFX7-LABEL: use_workitem_id_yz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_yz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.y()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) poison
@@ -107,229 +191,639 @@ define void @use_workitem_id_yz() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: s_swappc_b64
-; GCN-NOT: v31
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_x:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_add_u32 s0, s0, s5
+; GCN-NEXT:    s_addc_u32 s1, s1, 0
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, use_workitem_id_x at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, use_workitem_id_x at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v31, v0
+; GCN-NEXT:    s_mov_b32 s32, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_endpgm
   call void @use_workitem_id_x()
   ret void
 }
+; GCN: .amdhsa_system_vgpr_workitem_id 0
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
-
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v31
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 1
 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_y:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v31, 10, v1
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_y:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_y at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_y at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_y()
   ret void
 }
+; GCN: .amdhsa_system_vgpr_workitem_id 1
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
-
-; GCN-NOT: v0
-; GCN-NOT: v2
-; GCN-NOT: v31
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 20, v2
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_z:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v31, 20, v2
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_z:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_z at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_z at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_z()
   ret void
 }
+; GCN: .amdhsa_system_vgpr_workitem_id 2
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
-; GCN-NOT: v0
-; GCN-NOT: v1
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDY]]
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xy:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_xy at gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xy at gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT:    v_or_b32_e32 v31, v0, v1
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xy:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_xy at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xy at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_xy()
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
-; GCN-NOT: v0
-; GCN-NOT: v2
-
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDZ]]
-; GCN-NOT: v0
-; GCN-NOT: v2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_xz at gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xz at gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GFX7-NEXT:    v_or_b32_e32 v31, v0, v1
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_xz at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xz at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_xz()
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
-; GCN-NOT: v1
-; GCN-NOT: v2
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID: v_or_b32_e32 v31, [[IDY]], [[IDZ]]
-; GCN-NOT: v1
-; GCN-NOT: v2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_yz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_yz at gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_yz at gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 20, v2
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT:    v_or_b32_e32 v31, v1, v0
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_yz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_yz at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_yz at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_yz()
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v2
-
-; PACKED-TID: v_mov_b32_e32 v31, v0
-
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
-; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, [[IDZ]]
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xyz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_xyz at gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xyz at gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/159587


More information about the llvm-commits mailing list