[llvm] 4616368 - [AMDGPU] Allow WorkgroupID intrinsics in amdgpu_gfx functions (#89773)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 24 01:35:44 PDT 2024


Author: Jay Foad
Date: 2024-04-24T09:35:40+01:00
New Revision: 46163688e111956a9aeb3a35b8f85a40ef7035d0

URL: https://github.com/llvm/llvm-project/commit/46163688e111956a9aeb3a35b8f85a40ef7035d0
DIFF: https://github.com/llvm/llvm-project/commit/46163688e111956a9aeb3a35b8f85a40ef7035d0.diff

LOG: [AMDGPU] Allow WorkgroupID intrinsics in amdgpu_gfx functions (#89773)

With GFX12 architected SGPRs the workgroup ids are trivially available
in any function called from a compute entrypoint.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 780dfaae11ef3e..de029a4d6bedd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4248,7 +4248,8 @@ bool AMDGPULegalizerInfo::loadInputValue(
       AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
   const ArgDescriptor WorkGroupIDZ =
       ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
-  if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+  if (ST.hasArchitectedSGPRs() &&
+      (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
     switch (ArgType) {
     case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
       Arg = &WorkGroupIDX;

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 17b6e0cb9c3b45..50159d435aef7e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2124,7 +2124,8 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
       AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
   const ArgDescriptor WorkGroupIDZ =
       ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
-  if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+  if (Subtarget->hasArchitectedSGPRs() &&
+      (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
     switch (PVID) {
     case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
       Reg = &WorkGroupIDX;
@@ -2798,7 +2799,9 @@ SDValue SITargetLowering::LowerFormalArguments(
     (void)UserSGPRInfo;
     if (!Subtarget->enableFlatScratch())
       assert(!UserSGPRInfo.hasFlatScratchInit());
-    if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
+    if ((CallConv != CallingConv::AMDGPU_CS &&
+         CallConv != CallingConv::AMDGPU_Gfx) ||
+        !Subtarget->hasArchitectedSGPRs())
       assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
              !Info->hasWorkGroupIDZ());
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 12433dc83c4892..bf4a501cc3159d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -110,7 +110,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
   }
 
   if (!AMDGPU::isGraphics(CC) ||
-      (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
+      ((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_CS) &&
+       ST.hasArchitectedSGPRs())) {
     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
       WorkGroupIDX = true;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
index cfff0a969da9e7..14fe4e5f48c67c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
 
@@ -156,10 +156,37 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9ARCH-LABEL: workgroup_ids_gfx:
-; GFX9ARCH:       ; %bb.0:
-; GFX9ARCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9ARCH-NEXT:    s_setpc_b64 s[30:31]
+; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
+; GFX9ARCH-SDAG:       ; %bb.0:
+; GFX9ARCH-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9ARCH-SDAG-NEXT:    v_mov_b32_e32 v6, ttmp9
+; GFX9ARCH-SDAG-NEXT:    s_and_b32 s34, ttmp7, 0xffff
+; GFX9ARCH-SDAG-NEXT:    global_store_dword v[0:1], v6, off
+; GFX9ARCH-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT:    v_mov_b32_e32 v0, s34
+; GFX9ARCH-SDAG-NEXT:    s_lshr_b32 s34, ttmp7, 16
+; GFX9ARCH-SDAG-NEXT:    global_store_dword v[2:3], v0, off
+; GFX9ARCH-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT:    v_mov_b32_e32 v0, s34
+; GFX9ARCH-SDAG-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9ARCH-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9ARCH-GISEL-LABEL: workgroup_ids_gfx:
+; GFX9ARCH-GISEL:       ; %bb.0:
+; GFX9ARCH-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9ARCH-GISEL-NEXT:    v_mov_b32_e32 v6, ttmp9
+; GFX9ARCH-GISEL-NEXT:    s_and_b32 s34, ttmp7, 0xffff
+; GFX9ARCH-GISEL-NEXT:    s_lshr_b32 s35, ttmp7, 16
+; GFX9ARCH-GISEL-NEXT:    global_store_dword v[0:1], v6, off
+; GFX9ARCH-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT:    v_mov_b32_e32 v0, s34
+; GFX9ARCH-GISEL-NEXT:    global_store_dword v[2:3], v0, off
+; GFX9ARCH-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT:    v_mov_b32_e32 v0, s35
+; GFX9ARCH-GISEL-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9ARCH-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: workgroup_ids_gfx:
 ; GFX12:       ; %bb.0:
@@ -168,6 +195,18 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT:    v_dual_mov_b32 v6, ttmp9 :: v_dual_mov_b32 v7, s0
+; GFX12-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-NEXT:    v_mov_b32_e32 v8, s1
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v6, off scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_store_b32 v[2:3], v7, off scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_store_b32 v[4:5], v8, off scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
   %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -177,11 +216,3 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
   store volatile i32 %id.z, ptr addrspace(1) %outz
   ret void
 }
-
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX9-GISEL: {{.*}}
-; GFX9-SDAG: {{.*}}


        


More information about the llvm-commits mailing list