[llvm] 6cf37dd - [AMDGPU] Enable architected SGPRs for GFX12 (#79160)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 08:36:34 PST 2024
Author: Jay Foad
Date: 2024-01-23T16:36:30Z
New Revision: 6cf37dd504d0466bd9f34e29712d60759ef8180f
URL: https://github.com/llvm/llvm-project/commit/6cf37dd504d0466bd9f34e29712d60759ef8180f
DIFF: https://github.com/llvm/llvm-project/commit/6cf37dd504d0466bd9f34e29712d60759ef8180f.diff
LOG: [AMDGPU] Enable architected SGPRs for GFX12 (#79160)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 92985f971f17a75..cb29d5d94759812 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1496,6 +1496,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureWavefrontSize32,
FeatureShaderCyclesHiLoRegisters,
FeatureArchitectedFlatScratch,
+ FeatureArchitectedSGPRs,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureAtomicDsPkAdd16Insts,
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index 9965d214cc9b3fe..380a13ed16128f8 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -41,30 +41,30 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
;
; GFX12-LABEL: indirect_call_known_no_special_inputs:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_getpc_b64 s[2:3]
+; GFX12-NEXT: s_sext_i32_i16 s3, s3
+; GFX12-NEXT: s_add_co_u32 s2, s2, snork at gotpcrel32@lo+8
+; GFX12-NEXT: s_add_co_ci_u32 s3, s3, snork at gotpcrel32@hi+16
+; GFX12-NEXT: s_mov_b64 s[0:1], 0
; GFX12-NEXT: s_getpc_b64 s[4:5]
; GFX12-NEXT: s_sext_i32_i16 s5, s5
-; GFX12-NEXT: s_add_co_u32 s4, s4, snork at gotpcrel32@lo+8
-; GFX12-NEXT: s_add_co_ci_u32 s5, s5, snork at gotpcrel32@hi+16
-; GFX12-NEXT: s_mov_b64 s[2:3], 0
-; GFX12-NEXT: s_getpc_b64 s[6:7]
-; GFX12-NEXT: s_sext_i32_i16 s7, s7
-; GFX12-NEXT: s_add_co_u32 s6, s6, wobble at gotpcrel32@lo+8
-; GFX12-NEXT: s_add_co_ci_u32 s7, s7, wobble at gotpcrel32@hi+16
-; GFX12-NEXT: s_load_u8 s1, s[2:3], 0x0
+; GFX12-NEXT: s_add_co_u32 s4, s4, wobble at gotpcrel32@lo+8
+; GFX12-NEXT: s_add_co_ci_u32 s5, s5, wobble at gotpcrel32@hi+16
+; GFX12-NEXT: s_load_u8 s6, s[0:1], 0x0
+; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX12-NEXT: s_load_b64 s[4:5], s[6:7], 0x0
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0
; GFX12-NEXT: v_mov_b32_e32 v31, v0
+; GFX12-NEXT: s_mov_b32 s12, ttmp9
; GFX12-NEXT: s_mov_b64 s[8:9], 0
-; GFX12-NEXT: s_mov_b32 s12, s0
; GFX12-NEXT: s_mov_b32 s32, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_and_b32 s1, 1, s1
+; GFX12-NEXT: s_and_b32 s4, 1, s6
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: s_cmp_eq_u32 s1, 1
-; GFX12-NEXT: s_cselect_b32 s3, s5, s3
-; GFX12-NEXT: s_cselect_b32 s2, s4, s2
-; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX12-NEXT: s_cmp_eq_u32 s4, 1
+; GFX12-NEXT: s_cselect_b32 s1, s3, s1
+; GFX12-NEXT: s_cselect_b32 s0, s2, s0
+; GFX12-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX12-NEXT: s_endpgm
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
index c732ff709425505..495b54758de0493 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
@@ -23,6 +25,30 @@ define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: _amdgpu_cs_main:
+; GFX12-SDAG: ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: _amdgpu_cs_main:
+; GFX12-GISEL: ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
.entry:
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -68,6 +94,24 @@ define amdgpu_cs void @caller() {
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: caller:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX12-SDAG-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-SDAG-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: caller:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX12-GISEL-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-GISEL-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-GISEL-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
call amdgpu_gfx void @callee(i32 %idx)
ret void
@@ -79,3 +123,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12: {{.*}}
+; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index c492b54759d82d7..769e6b0964abdb2 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -1,25 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
-; GCN-SDAG-LABEL: workgroup_id_x:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: workgroup_id_x:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_x:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: workgroup_id_x:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_x:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_x:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
@@ -27,27 +49,29 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
}
define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) {
-; GCN-SDAG-LABEL: workgroup_id_xy:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp7
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-LABEL: workgroup_id_xy:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
+; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_xy:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp7
-; GCN-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX12-LABEL: workgroup_id_xy:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: global_store_b32 v0, v2, s[2:3]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -57,37 +81,56 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
}
define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
-; GCN-SDAG-LABEL: workgroup_id_xyz:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0
-; GCN-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: workgroup_id_xyz:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_xyz:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_xyz:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GCN-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GCN-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX12-LABEL: workgroup_id_xyz:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10
+; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_clause 0x2
+; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
+; GFX12-NEXT: global_store_b32 v0, v3, s[0:1]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
More information about the llvm-commits
mailing list