[llvm] [AMDGPU] Enable architected SGPRs for GFX12 (PR #79160)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 08:10:31 PST 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/79160
- Precommit tests
- [AMDGPU] Enable architected SGPRs for GFX12
>From f8f103029e87460d02e03072f94f4d8272c8e6d2 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 23 Jan 2024 15:57:22 +0000
Subject: [PATCH 1/2] Precommit tests
---
.../AMDGPU/lower-work-group-id-intrinsics.ll | 31 ++-
.../AMDGPU/workgroup-id-in-arch-sgprs.ll | 176 +++++++++++-------
2 files changed, 137 insertions(+), 70 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
index c732ff70942550..b94758d1c4ddf5 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
@@ -23,6 +25,13 @@ define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-LABEL: _amdgpu_cs_main:
+; GFX12: ; %bb.0: ; %.entry
+; GFX12-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
.entry:
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -68,6 +77,22 @@ define amdgpu_cs void @caller() {
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: caller:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-SDAG-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: caller:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-GISEL-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-GISEL-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
call amdgpu_gfx void @callee(i32 %idx)
ret void
@@ -79,3 +104,5 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index c492b54759d82d..74a587a3b09abd 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -1,25 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
-; GCN-SDAG-LABEL: workgroup_id_x:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: workgroup_id_x:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_x:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: workgroup_id_x:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_x:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_x:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
@@ -27,27 +49,29 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
}
define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) {
-; GCN-SDAG-LABEL: workgroup_id_xy:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp7
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-LABEL: workgroup_id_xy:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
+; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_xy:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp7
-; GCN-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX12-LABEL: workgroup_id_xy:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-NEXT: v_mov_b32_e32 v2, s3
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -57,37 +81,53 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
}
define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
-; GCN-SDAG-LABEL: workgroup_id_xyz:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0
-; GCN-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: workgroup_id_xyz:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_xyz:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_xyz:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GCN-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GCN-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX12-LABEL: workgroup_id_xyz:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[8:11], s[0:1], 0x0
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s4
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_clause 0x2
+; GFX12-NEXT: global_store_b32 v0, v1, s[8:9]
+; GFX12-NEXT: global_store_b32 v0, v2, s[10:11]
+; GFX12-NEXT: global_store_b32 v0, v3, s[0:1]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
>From 1935d52a6eea036e44d4abd0ea79f0b923b45df1 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 23 Jan 2024 16:07:41 +0000
Subject: [PATCH 2/2] [AMDGPU] Enable architected SGPRs for GFX12
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 1 +
.../AMDGPU/indirect-call-known-callees.ll | 30 ++++++++---------
.../AMDGPU/lower-work-group-id-intrinsics.ll | 32 +++++++++++++++----
.../AMDGPU/workgroup-id-in-arch-sgprs.ll | 27 +++++++++-------
4 files changed, 57 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 92985f971f17a7..cb29d5d9475981 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1496,6 +1496,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureWavefrontSize32,
FeatureShaderCyclesHiLoRegisters,
FeatureArchitectedFlatScratch,
+ FeatureArchitectedSGPRs,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureAtomicDsPkAdd16Insts,
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index 9965d214cc9b3f..380a13ed16128f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -41,30 +41,30 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
;
; GFX12-LABEL: indirect_call_known_no_special_inputs:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_getpc_b64 s[2:3]
+; GFX12-NEXT: s_sext_i32_i16 s3, s3
+; GFX12-NEXT: s_add_co_u32 s2, s2, snork at gotpcrel32@lo+8
+; GFX12-NEXT: s_add_co_ci_u32 s3, s3, snork at gotpcrel32@hi+16
+; GFX12-NEXT: s_mov_b64 s[0:1], 0
; GFX12-NEXT: s_getpc_b64 s[4:5]
; GFX12-NEXT: s_sext_i32_i16 s5, s5
-; GFX12-NEXT: s_add_co_u32 s4, s4, snork at gotpcrel32@lo+8
-; GFX12-NEXT: s_add_co_ci_u32 s5, s5, snork at gotpcrel32@hi+16
-; GFX12-NEXT: s_mov_b64 s[2:3], 0
-; GFX12-NEXT: s_getpc_b64 s[6:7]
-; GFX12-NEXT: s_sext_i32_i16 s7, s7
-; GFX12-NEXT: s_add_co_u32 s6, s6, wobble at gotpcrel32@lo+8
-; GFX12-NEXT: s_add_co_ci_u32 s7, s7, wobble at gotpcrel32@hi+16
-; GFX12-NEXT: s_load_u8 s1, s[2:3], 0x0
+; GFX12-NEXT: s_add_co_u32 s4, s4, wobble at gotpcrel32@lo+8
+; GFX12-NEXT: s_add_co_ci_u32 s5, s5, wobble at gotpcrel32@hi+16
+; GFX12-NEXT: s_load_u8 s6, s[0:1], 0x0
+; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX12-NEXT: s_load_b64 s[4:5], s[6:7], 0x0
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0
; GFX12-NEXT: v_mov_b32_e32 v31, v0
+; GFX12-NEXT: s_mov_b32 s12, ttmp9
; GFX12-NEXT: s_mov_b64 s[8:9], 0
-; GFX12-NEXT: s_mov_b32 s12, s0
; GFX12-NEXT: s_mov_b32 s32, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_and_b32 s1, 1, s1
+; GFX12-NEXT: s_and_b32 s4, 1, s6
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: s_cmp_eq_u32 s1, 1
-; GFX12-NEXT: s_cselect_b32 s3, s5, s3
-; GFX12-NEXT: s_cselect_b32 s2, s4, s2
-; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX12-NEXT: s_cmp_eq_u32 s4, 1
+; GFX12-NEXT: s_cselect_b32 s1, s3, s1
+; GFX12-NEXT: s_cselect_b32 s0, s2, s0
+; GFX12-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX12-NEXT: s_endpgm
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
index b94758d1c4ddf5..495b54758de049 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -26,12 +26,29 @@ define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX12-LABEL: _amdgpu_cs_main:
-; GFX12: ; %bb.0: ; %.entry
-; GFX12-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
-; GFX12-NEXT: s_nop 0
-; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX12-NEXT: s_endpgm
+; GFX12-SDAG-LABEL: _amdgpu_cs_main:
+; GFX12-SDAG: ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: _amdgpu_cs_main:
+; GFX12-GISEL: ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
.entry:
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -80,6 +97,7 @@ define amdgpu_cs void @caller() {
;
; GFX12-SDAG-LABEL: caller:
; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
; GFX12-SDAG-NEXT: s_mov_b32 s1, callee at abs32@hi
; GFX12-SDAG-NEXT: s_mov_b32 s0, callee at abs32@lo
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
@@ -88,6 +106,7 @@ define amdgpu_cs void @caller() {
;
; GFX12-GISEL-LABEL: caller:
; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
; GFX12-GISEL-NEXT: s_mov_b32 s0, callee at abs32@lo
; GFX12-GISEL-NEXT: s_mov_b32 s1, callee at abs32@hi
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
@@ -105,4 +124,5 @@ declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12: {{.*}}
; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index 74a587a3b09abd..769e6b0964abdb 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -26,7 +26,7 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
; GFX12-SDAG-LABEL: workgroup_id_x:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-SDAG-NEXT: s_nop 0
@@ -36,7 +36,7 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
; GFX12-GISEL-LABEL: workgroup_id_x:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12-GISEL-NEXT: s_nop 0
@@ -62,13 +62,13 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
;
; GFX12-LABEL: workgroup_id_xy:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
-; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX12-NEXT: v_mov_b32_e32 v2, s3
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
-; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: global_store_b32 v0, v2, s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm
@@ -116,14 +116,17 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
; GFX12-LABEL: workgroup_id_xyz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: s_load_b128 s[8:11], s[0:1], 0x0
+; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10
-; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s4
+; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_clause 0x2
-; GFX12-NEXT: global_store_b32 v0, v1, s[8:9]
-; GFX12-NEXT: global_store_b32 v0, v2, s[10:11]
+; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
; GFX12-NEXT: global_store_b32 v0, v3, s[0:1]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
More information about the llvm-commits
mailing list