[llvm] 4616368 - [AMDGPU] Allow WorkgroupID intrinsics in amdgpu_gfx functions (#89773)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 01:35:44 PDT 2024
Author: Jay Foad
Date: 2024-04-24T09:35:40+01:00
New Revision: 46163688e111956a9aeb3a35b8f85a40ef7035d0
URL: https://github.com/llvm/llvm-project/commit/46163688e111956a9aeb3a35b8f85a40ef7035d0
DIFF: https://github.com/llvm/llvm-project/commit/46163688e111956a9aeb3a35b8f85a40ef7035d0.diff
LOG: [AMDGPU] Allow WorkgroupID intrinsics in amdgpu_gfx functions (#89773)
With GFX12 architected SGPRs the workgroup ids are trivially available
in any function called from a compute entrypoint.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 780dfaae11ef3e..de029a4d6bedd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4248,7 +4248,8 @@ bool AMDGPULegalizerInfo::loadInputValue(
AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
const ArgDescriptor WorkGroupIDZ =
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
- if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ if (ST.hasArchitectedSGPRs() &&
+ (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
switch (ArgType) {
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
Arg = &WorkGroupIDX;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 17b6e0cb9c3b45..50159d435aef7e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2124,7 +2124,8 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
const ArgDescriptor WorkGroupIDZ =
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
- if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ if (Subtarget->hasArchitectedSGPRs() &&
+ (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
switch (PVID) {
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
Reg = &WorkGroupIDX;
@@ -2798,7 +2799,9 @@ SDValue SITargetLowering::LowerFormalArguments(
(void)UserSGPRInfo;
if (!Subtarget->enableFlatScratch())
assert(!UserSGPRInfo.hasFlatScratchInit());
- if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
+ if ((CallConv != CallingConv::AMDGPU_CS &&
+ CallConv != CallingConv::AMDGPU_Gfx) ||
+ !Subtarget->hasArchitectedSGPRs())
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ());
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 12433dc83c4892..bf4a501cc3159d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -110,7 +110,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
}
if (!AMDGPU::isGraphics(CC) ||
- (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
+ ((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_CS) &&
+ ST.hasArchitectedSGPRs())) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
index cfff0a969da9e7..14fe4e5f48c67c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
@@ -156,10 +156,37 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9ARCH-LABEL: workgroup_ids_gfx:
-; GFX9ARCH: ; %bb.0:
-; GFX9ARCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9ARCH-NEXT: s_setpc_b64 s[30:31]
+; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
+; GFX9ARCH-SDAG: ; %bb.0:
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v6, ttmp9
+; GFX9ARCH-SDAG-NEXT: s_and_b32 s34, ttmp7, 0xffff
+; GFX9ARCH-SDAG-NEXT: global_store_dword v[0:1], v6, off
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34
+; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s34, ttmp7, 16
+; GFX9ARCH-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34
+; GFX9ARCH-SDAG-NEXT: global_store_dword v[4:5], v0, off
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9ARCH-GISEL-LABEL: workgroup_ids_gfx:
+; GFX9ARCH-GISEL: ; %bb.0:
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v6, ttmp9
+; GFX9ARCH-GISEL-NEXT: s_and_b32 s34, ttmp7, 0xffff
+; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s35, ttmp7, 16
+; GFX9ARCH-GISEL-NEXT: global_store_dword v[0:1], v6, off
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s34
+; GFX9ARCH-GISEL-NEXT: global_store_dword v[2:3], v0, off
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s35
+; GFX9ARCH-GISEL-NEXT: global_store_dword v[4:5], v0, off
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: workgroup_ids_gfx:
; GFX12: ; %bb.0:
@@ -168,6 +195,18 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: v_dual_mov_b32 v6, ttmp9 :: v_dual_mov_b32 v7, s0
+; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX12-NEXT: v_mov_b32_e32 v8, s1
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v[0:1], v6, off scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v[4:5], v8, off scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%id.x = call i32 @llvm.amdgcn.workgroup.id.x()
%id.y = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -177,11 +216,3 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
store volatile i32 %id.z, ptr addrspace(1) %outz
ret void
}
-
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX9-GISEL: {{.*}}
-; GFX9-SDAG: {{.*}}
More information about the llvm-commits
mailing list