[llvm] df57833 - [AMDGPU] Generate checks for llvm.amdgcn.is.private/shared (#103859)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 05:23:36 PDT 2024


Author: Jay Foad
Date: 2024-08-14T13:23:33+01:00
New Revision: df57833ea8a3f527b7941576b4a130ddd4361e61

URL: https://github.com/llvm/llvm-project/commit/df57833ea8a3f527b7941576b4a130ddd4361e61
DIFF: https://github.com/llvm/llvm-project/commit/df57833ea8a3f527b7941576b4a130ddd4361e61.diff

LOG: [AMDGPU] Generate checks for llvm.amdgcn.is.private/shared (#103859)

Also combine the GlobalISel tests into the SelectionDAG ones.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll

Removed: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
deleted file mode 100644
index b0c6e89380d810..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CI %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
-
-; TODO: Merge with DAG test
-
-define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
-; CI-LABEL: is_private_vgpr:
-; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dword s2, s[6:7], 0x32
-; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    v_mov_b32_e32 v1, s1
-; CI-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
-; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
-; CI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CI-NEXT:    flat_store_dword v[0:1], v0
-; CI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: is_private_vgpr:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX9-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: is_private_vgpr:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX10-NEXT:    global_store_dword v[0:1], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: is_private_vgpr:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b64 v[0:1], v0, s[0:1] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
-; GFX11-NEXT:    s_nop 0
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT:    s_endpgm
-  %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
-  %ptr = load volatile ptr, ptr addrspace(1) %gep
-  %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
-  %ext = zext i1 %val to i32
-  store i32 %ext, ptr addrspace(1) undef
-  ret void
-}
-
-define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
-; CI-LABEL: is_private_sgpr:
-; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x32
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_cmp_lg_u32 s1, s0
-; CI-NEXT:    s_cbranch_scc1 .LBB1_2
-; CI-NEXT:  ; %bb.1: ; %bb0
-; CI-NEXT:    v_mov_b32_e32 v0, 0
-; CI-NEXT:    flat_store_dword v[0:1], v0
-; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:  .LBB1_2: ; %bb1
-; CI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: is_private_sgpr:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX9-NEXT:    s_mov_b64 s[2:3], src_private_base
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_cmp_lg_u32 s1, s3
-; GFX9-NEXT:    s_cbranch_scc1 .LBB1_2
-; GFX9-NEXT:  ; %bb.1: ; %bb0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:  .LBB1_2: ; %bb1
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: is_private_sgpr:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX10-NEXT:    s_mov_b64 s[2:3], src_private_base
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_cmp_lg_u32 s1, s3
-; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
-; GFX10-NEXT:  ; %bb.1: ; %bb0
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    global_store_dword v[0:1], v0, off
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:  .LBB1_2: ; %bb1
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: is_private_sgpr:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
-; GFX11-NEXT:    s_mov_b64 s[2:3], src_private_base
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_cmp_lg_u32 s1, s3
-; GFX11-NEXT:    s_cbranch_scc1 .LBB1_2
-; GFX11-NEXT:  ; %bb.1: ; %bb0
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:  .LBB1_2: ; %bb1
-; GFX11-NEXT:    s_endpgm
-  %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
-  br i1 %val, label %bb0, label %bb1
-
-bb0:
-  store volatile i32 0, ptr addrspace(1) undef
-  br label %bb1
-
-bb1:
-  ret void
-}
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
deleted file mode 100644
index bbcb807a956bee..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CI %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
-
-; TODO: Merge with DAG test
-
-define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
-; CI-LABEL: is_local_vgpr:
-; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dword s2, s[6:7], 0x33
-; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    v_mov_b32_e32 v1, s1
-; CI-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
-; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
-; CI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CI-NEXT:    flat_store_dword v[0:1], v0
-; CI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: is_local_vgpr:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b64 s[0:1], src_shared_base
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX9-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: is_local_vgpr:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_mov_b64 s[0:1], src_shared_base
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX10-NEXT:    global_store_dword v[0:1], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: is_local_vgpr:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b64 v[0:1], v0, s[0:1] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b64 s[0:1], src_shared_base
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
-; GFX11-NEXT:    s_nop 0
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT:    s_endpgm
-  %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
-  %ptr = load volatile ptr, ptr addrspace(1) %gep
-  %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
-  %ext = zext i1 %val to i32
-  store i32 %ext, ptr addrspace(1) undef
-  ret void
-}
-
-define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
-; CI-LABEL: is_local_sgpr:
-; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x33
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_cmp_lg_u32 s1, s0
-; CI-NEXT:    s_cbranch_scc1 .LBB1_2
-; CI-NEXT:  ; %bb.1: ; %bb0
-; CI-NEXT:    v_mov_b32_e32 v0, 0
-; CI-NEXT:    flat_store_dword v[0:1], v0
-; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:  .LBB1_2: ; %bb1
-; CI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: is_local_sgpr:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX9-NEXT:    s_mov_b64 s[2:3], src_shared_base
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_cmp_lg_u32 s1, s3
-; GFX9-NEXT:    s_cbranch_scc1 .LBB1_2
-; GFX9-NEXT:  ; %bb.1: ; %bb0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:  .LBB1_2: ; %bb1
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: is_local_sgpr:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX10-NEXT:    s_mov_b64 s[2:3], src_shared_base
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_cmp_lg_u32 s1, s3
-; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
-; GFX10-NEXT:  ; %bb.1: ; %bb0
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    global_store_dword v[0:1], v0, off
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:  .LBB1_2: ; %bb1
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: is_local_sgpr:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
-; GFX11-NEXT:    s_mov_b64 s[2:3], src_shared_base
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_cmp_lg_u32 s1, s3
-; GFX11-NEXT:    s_cbranch_scc1 .LBB1_2
-; GFX11-NEXT:  ; %bb.1: ; %bb0
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:  .LBB1_2: ; %bb1
-; GFX11-NEXT:    s_endpgm
-  %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
-  br i1 %val, label %bb0, label %bb1
-
-bb0:
-  store volatile i32 0, ptr addrspace(1) undef
-  br label %bb1
-
-bb1:
-  ret void
-}
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
index 0076079ce17c77..42e8b2608dc1c0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
@@ -1,18 +1,106 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
 
-; GCN-LABEL: {{^}}is_private_vgpr:
-; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
-; CI-DAG: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CIT: v_cmp_eq_u32_e32 vcc, s4, v[[PTR_HI]]
-; CIH: v_cmp_eq_u32_e32 vcc, s2, v[[PTR_HI]]
-
-; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
-; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
-
-; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
 define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
+; SI-LABEL: is_private_vgpr:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; SI-NEXT:    s_load_dword s4, s[6:7], 0x32
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; CI-SDAG-LABEL: is_private_vgpr:
+; CI-SDAG:       ; %bb.0:
+; CI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-SDAG-NEXT:    s_load_dword s2, s[6:7], 0x32
+; CI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; CI-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; CI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-SDAG-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
+; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
+; CI-SDAG-NEXT:    s_endpgm
+;
+; GFX9-LABEL: is_private_vgpr:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; CI-GISEL-LABEL: is_private_vgpr:
+; CI-GISEL:       ; %bb.0:
+; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-GISEL-NEXT:    s_load_dword s2, s[6:7], 0x32
+; CI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; CI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; CI-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
+; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
+; CI-GISEL-NEXT:    s_endpgm
+;
+; GFX10-LABEL: is_private_vgpr:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-NEXT:    global_store_dword v[0:1], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: is_private_vgpr:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_load_b64 v[0:1], v0, s[0:1] glc dlc
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
   %ptr = load volatile ptr, ptr addrspace(1) %gep
@@ -24,20 +112,113 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
 
 ; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
 ; select and vcc branch.
-
-; GCN-LABEL: {{^}}is_private_sgpr:
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x1{{$}}
-
-; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x32{{$}}
-; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x4{{$}}
-
-; CI: s_cmp_eq_u32 [[APERTURE]], [[PTR_HI]]
-
-; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
-; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
-
-; GCN: s_cbranch_vccnz
 define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
+; SI-LABEL: is_private_sgpr:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s0, s[6:7], 0x1
+; SI-NEXT:    s_load_dword s1, s[6:7], 0x32
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_cmp_eq_u32 s0, s1
+; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; SI-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; SI-NEXT:    s_cbranch_vccnz .LBB1_2
+; SI-NEXT:  ; %bb.1: ; %bb0
+; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:  .LBB1_2: ; %bb1
+; SI-NEXT:    s_endpgm
+;
+; CI-SDAG-LABEL: is_private_sgpr:
+; CI-SDAG:       ; %bb.0:
+; CI-SDAG-NEXT:    s_load_dword s0, s[6:7], 0x1
+; CI-SDAG-NEXT:    s_load_dword s1, s[6:7], 0x32
+; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-SDAG-NEXT:    s_cmp_eq_u32 s0, s1
+; CI-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CI-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; CI-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
+; CI-SDAG-NEXT:  ; %bb.1: ; %bb0
+; CI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
+; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CI-SDAG-NEXT:  .LBB1_2: ; %bb1
+; CI-SDAG-NEXT:    s_endpgm
+;
+; GFX9-SDAG-LABEL: is_private_sgpr:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dword s2, s[6:7], 0x4
+; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_cmp_eq_u32 s2, s1
+; GFX9-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; GFX9-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; GFX9-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
+; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:  .LBB1_2: ; %bb1
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; CI-GISEL-LABEL: is_private_sgpr:
+; CI-GISEL:       ; %bb.0:
+; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-GISEL-NEXT:    s_load_dword s0, s[6:7], 0x32
+; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-GISEL-NEXT:    s_cmp_lg_u32 s1, s0
+; CI-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
+; CI-GISEL-NEXT:  ; %bb.1: ; %bb0
+; CI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
+; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CI-GISEL-NEXT:  .LBB1_2: ; %bb1
+; CI-GISEL-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: is_private_sgpr:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], src_private_base
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s1, s3
+; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
+; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:  .LBB1_2: ; %bb1
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX10-LABEL: is_private_sgpr:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX10-NEXT:    s_mov_b64 s[2:3], src_private_base
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_cmp_lg_u32 s1, s3
+; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
+; GFX10-NEXT:  ; %bb.1: ; %bb0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    global_store_dword v[0:1], v0, off
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:  .LBB1_2: ; %bb1
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: is_private_sgpr:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11-NEXT:    s_mov_b64 s[2:3], src_private_base
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_cmp_lg_u32 s1, s3
+; GFX11-NEXT:    s_cbranch_scc1 .LBB1_2
+; GFX11-NEXT:  ; %bb.1: ; %bb0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:  .LBB1_2: ; %bb1
+; GFX11-NEXT:    s_endpgm
   %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
   br i1 %val, label %bb0, label %bb1
 
@@ -49,10 +230,10 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CI: {{.*}}
+; GFX10-GISEL: {{.*}}
+; GFX11-GISEL: {{.*}}
+; SI-SDAG: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index e24c47991fe3d7..f8e60e5eb09a16 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -1,18 +1,139 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
 
-; GCN-LABEL: {{^}}is_local_vgpr:
-; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
-; CI-DAG: s_load_dwordx2 s[0:1], s[6:7], 0x0
-
-; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
-; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
-
-; CIT: v_cmp_eq_u32_e32 vcc, s4, v[[PTR_HI]]
-; CIH: v_cmp_eq_u32_e32 vcc, s2, v[[PTR_HI]]
-; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
 define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
+; CIT-LABEL: is_local_vgpr:
+; CIT:       ; %bb.0:
+; CIT-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CIT-NEXT:    s_load_dword s4, s[6:7], 0x33
+; CIT-NEXT:    s_mov_b32 s2, 0
+; CIT-NEXT:    s_mov_b32 s3, 0x100f000
+; CIT-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; CIT-NEXT:    v_mov_b32_e32 v1, 0
+; CIT-NEXT:    s_waitcnt lgkmcnt(0)
+; CIT-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
+; CIT-NEXT:    s_waitcnt vmcnt(0)
+; CIT-NEXT:    s_mov_b32 s2, -1
+; CIT-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; CIT-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CIT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; CIT-NEXT:    s_endpgm
+;
+; CIH-LABEL: is_local_vgpr:
+; CIH:       ; %bb.0:
+; CIH-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CIH-NEXT:    s_load_dword s2, s[6:7], 0x33
+; CIH-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; CIH-NEXT:    s_waitcnt lgkmcnt(0)
+; CIH-NEXT:    v_mov_b32_e32 v1, s1
+; CIH-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; CIH-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CIH-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
+; CIH-NEXT:    s_waitcnt vmcnt(0)
+; CIH-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CIH-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CIH-NEXT:    flat_store_dword v[0:1], v0
+; CIH-NEXT:    s_endpgm
+;
+; SI-LABEL: is_local_vgpr:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; SI-NEXT:    s_load_dword s4, s[6:7], 0x33
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; CI-SDAG-LABEL: is_local_vgpr:
+; CI-SDAG:       ; %bb.0:
+; CI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-SDAG-NEXT:    s_load_dword s2, s[6:7], 0x33
+; CI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; CI-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; CI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-SDAG-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
+; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
+; CI-SDAG-NEXT:    s_endpgm
+;
+; GFX9-LABEL: is_local_vgpr:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[0:1], src_shared_base
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; CI-GISEL-LABEL: is_local_vgpr:
+; CI-GISEL:       ; %bb.0:
+; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-GISEL-NEXT:    s_load_dword s2, s[6:7], 0x33
+; CI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; CI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; CI-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
+; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
+; CI-GISEL-NEXT:    s_endpgm
+;
+; GFX10-LABEL: is_local_vgpr:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10-NEXT:    s_mov_b64 s[0:1], src_shared_base
+; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-NEXT:    global_store_dword v[0:1], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: is_local_vgpr:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_load_b64 v[0:1], v0, s[0:1] glc dlc
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_mov_b64 s[0:1], src_shared_base
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
   %ptr = load volatile ptr, ptr addrspace(1) %gep
@@ -24,19 +145,147 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
 
 ; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
 ; select and vcc branch.
-
-; GCN-LABEL: {{^}}is_local_sgpr:
-; CI-DAG: s_load_dword s0, s[6:7], 0x1
-
-; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x33{{$}}
-; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x4{{$}}
-
-; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
-; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
-
-; CI: s_cmp_eq_u32 s0, [[PTR_HI]]
-; GCN: s_cbranch_vccnz
 define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
+; CIT-LABEL: is_local_sgpr:
+; CIT:       ; %bb.0:
+; CIT-NEXT:    s_load_dword s0, s[6:7], 0x1
+; CIT-NEXT:    s_load_dword s1, s[6:7], 0x33
+; CIT-NEXT:    s_waitcnt lgkmcnt(0)
+; CIT-NEXT:    s_cmp_eq_u32 s0, s1
+; CIT-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CIT-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; CIT-NEXT:    s_cbranch_vccnz .LBB1_2
+; CIT-NEXT:  ; %bb.1: ; %bb0
+; CIT-NEXT:    s_mov_b32 s3, 0x100f000
+; CIT-NEXT:    s_mov_b32 s2, -1
+; CIT-NEXT:    v_mov_b32_e32 v0, 0
+; CIT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; CIT-NEXT:    s_waitcnt vmcnt(0)
+; CIT-NEXT:  .LBB1_2: ; %bb1
+; CIT-NEXT:    s_endpgm
+;
+; CIH-LABEL: is_local_sgpr:
+; CIH:       ; %bb.0:
+; CIH-NEXT:    s_load_dword s0, s[6:7], 0x1
+; CIH-NEXT:    s_load_dword s1, s[6:7], 0x33
+; CIH-NEXT:    s_waitcnt lgkmcnt(0)
+; CIH-NEXT:    s_cmp_eq_u32 s0, s1
+; CIH-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CIH-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; CIH-NEXT:    s_cbranch_vccnz .LBB1_2
+; CIH-NEXT:  ; %bb.1: ; %bb0
+; CIH-NEXT:    v_mov_b32_e32 v0, 0
+; CIH-NEXT:    flat_store_dword v[0:1], v0
+; CIH-NEXT:    s_waitcnt vmcnt(0)
+; CIH-NEXT:  .LBB1_2: ; %bb1
+; CIH-NEXT:    s_endpgm
+;
+; SI-LABEL: is_local_sgpr:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s0, s[6:7], 0x1
+; SI-NEXT:    s_load_dword s1, s[6:7], 0x33
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_cmp_eq_u32 s0, s1
+; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; SI-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; SI-NEXT:    s_cbranch_vccnz .LBB1_2
+; SI-NEXT:  ; %bb.1: ; %bb0
+; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:  .LBB1_2: ; %bb1
+; SI-NEXT:    s_endpgm
+;
+; CI-SDAG-LABEL: is_local_sgpr:
+; CI-SDAG:       ; %bb.0:
+; CI-SDAG-NEXT:    s_load_dword s0, s[6:7], 0x1
+; CI-SDAG-NEXT:    s_load_dword s1, s[6:7], 0x33
+; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-SDAG-NEXT:    s_cmp_eq_u32 s0, s1
+; CI-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CI-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; CI-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
+; CI-SDAG-NEXT:  ; %bb.1: ; %bb0
+; CI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
+; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CI-SDAG-NEXT:  .LBB1_2: ; %bb1
+; CI-SDAG-NEXT:    s_endpgm
+;
+; GFX9-SDAG-LABEL: is_local_sgpr:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dword s2, s[6:7], 0x4
+; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], src_shared_base
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_cmp_eq_u32 s2, s1
+; GFX9-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; GFX9-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
+; GFX9-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
+; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:  .LBB1_2: ; %bb1
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; CI-GISEL-LABEL: is_local_sgpr:
+; CI-GISEL:       ; %bb.0:
+; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-GISEL-NEXT:    s_load_dword s0, s[6:7], 0x33
+; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-GISEL-NEXT:    s_cmp_lg_u32 s1, s0
+; CI-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
+; CI-GISEL-NEXT:  ; %bb.1: ; %bb0
+; CI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
+; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CI-GISEL-NEXT:  .LBB1_2: ; %bb1
+; CI-GISEL-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: is_local_sgpr:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], src_shared_base
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s1, s3
+; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
+; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:  .LBB1_2: ; %bb1
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX10-LABEL: is_local_sgpr:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX10-NEXT:    s_mov_b64 s[2:3], src_shared_base
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_cmp_lg_u32 s1, s3
+; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
+; GFX10-NEXT:  ; %bb.1: ; %bb0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    global_store_dword v[0:1], v0, off
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:  .LBB1_2: ; %bb1
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: is_local_sgpr:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11-NEXT:    s_mov_b64 s[2:3], src_shared_base
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_cmp_lg_u32 s1, s3
+; GFX11-NEXT:    s_cbranch_scc1 .LBB1_2
+; GFX11-NEXT:  ; %bb.1: ; %bb0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:  .LBB1_2: ; %bb1
+; GFX11-NEXT:    s_endpgm
   %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
   br i1 %val, label %bb0, label %bb1
 
@@ -48,10 +297,10 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CI: {{.*}}
+; GFX10-GISEL: {{.*}}
+; GFX11-GISEL: {{.*}}
+; SI-SDAG: {{.*}}


        


More information about the llvm-commits mailing list