[llvm] 1c9e623 - [AMDGPU] Allow architected SGPRs for workgroup IDs

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 13 08:44:01 PST 2023


Author: Christudasan Devadasan
Date: 2023-02-13T22:11:35+05:30
New Revision: 1c9e6238fe2fe27ee9cdb594e4f2bb33bb416e49

URL: https://github.com/llvm/llvm-project/commit/1c9e6238fe2fe27ee9cdb594e4f2bb33bb416e49
DIFF: https://github.com/llvm/llvm-project/commit/1c9e6238fe2fe27ee9cdb594e4f2bb33bb416e49.diff

LOG: [AMDGPU] Allow architected SGPRs for workgroup IDs

Some subtargets use architected SGPRs for workgroup
IDs instead of the regular SGPRs. This patch enables
the support for the same and is guarded under the
subtarget feature FeatureArchitectedSGPRs.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D143707

Added: 
    llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 15925fd39e491..3e866213ec1d3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1866,7 +1866,7 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
     return DAG.getUNDEF(VT);
   }
 
-  return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
+  return loadInputValue(DAG, RC, VT, SDLoc(DAG.getEntryNode()), *Reg);
 }
 
 static void processPSInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
@@ -2181,11 +2181,16 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
                                            SIMachineFunctionInfo &Info,
                                            CallingConv::ID CallConv,
                                            bool IsShader) const {
+  bool HasArchitectedSGPRs = Subtarget->hasArchitectedSGPRs();
   if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
     // Note: user SGPRs are handled by the front-end for graphics shaders
     // Pad up the used user SGPRs with dead inputs.
-    unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
 
+    // TODO: NumRequiredSystemSGPRs computation should be adjusted appropriately
+    // before enabling architected SGPRs for workgroup IDs.
+    assert(!HasArchitectedSGPRs && "Unhandled feature for the subtarget");
+
+    unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
     // Note we do not count the PrivateSegmentWaveByteOffset. We do not want to
     // rely on it to reach 16 since if we end up having no stack usage, it will
     // not really be added.
@@ -2201,20 +2206,26 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
   }
 
   if (Info.hasWorkGroupIDX()) {
-    Register Reg = Info.addWorkGroupIDX();
-    MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+    Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs);
+    if (!HasArchitectedSGPRs)
+      MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+
     CCInfo.AllocateReg(Reg);
   }
 
   if (Info.hasWorkGroupIDY()) {
-    Register Reg = Info.addWorkGroupIDY();
-    MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+    Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs);
+    if (!HasArchitectedSGPRs)
+      MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+
     CCInfo.AllocateReg(Reg);
   }
 
   if (Info.hasWorkGroupIDZ()) {
-    Register Reg = Info.addWorkGroupIDZ();
-    MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+    Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs);
+    if (!HasArchitectedSGPRs)
+      MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+
     CCInfo.AllocateReg(Reg);
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index c869ee8757117..20d2184b32128 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -693,21 +693,32 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   }
 
   // Add system SGPRs.
-  Register addWorkGroupIDX() {
-    ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
-    NumSystemSGPRs += 1;
+  Register addWorkGroupIDX(bool HasArchitectedSGPRs) {
+    Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP9 : getNextSystemSGPR();
+    ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg);
+    if (!HasArchitectedSGPRs)
+      NumSystemSGPRs += 1;
+
     return ArgInfo.WorkGroupIDX.getRegister();
   }
 
-  Register addWorkGroupIDY() {
-    ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
-    NumSystemSGPRs += 1;
+  Register addWorkGroupIDY(bool HasArchitectedSGPRs) {
+    Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP7 : getNextSystemSGPR();
+    unsigned Mask = HasArchitectedSGPRs && hasWorkGroupIDZ() ? 0xffff : ~0u;
+    ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg, Mask);
+    if (!HasArchitectedSGPRs)
+      NumSystemSGPRs += 1;
+
     return ArgInfo.WorkGroupIDY.getRegister();
   }
 
-  Register addWorkGroupIDZ() {
-    ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
-    NumSystemSGPRs += 1;
+  Register addWorkGroupIDZ(bool HasArchitectedSGPRs) {
+    Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP7 : getNextSystemSGPR();
+    unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u;
+    ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg, Mask);
+    if (!HasArchitectedSGPRs)
+      NumSystemSGPRs += 1;
+
     return ArgInfo.WorkGroupIDZ.getRegister();
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
new file mode 100644
index 0000000000000..c492b54759d82
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s
+
+define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
+; GCN-SDAG-LABEL: workgroup_id_x:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, ttmp9
+; GCN-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-SDAG-NEXT:    s_endpgm
+;
+; GCN-GISEL-LABEL: workgroup_id_x:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GCN-GISEL-NEXT:    s_endpgm
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %idx, ptr addrspace(1) %ptrx
+
+  ret void
+}
+
+define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) {
+; GCN-SDAG-LABEL: workgroup_id_xy:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, ttmp9
+; GCN-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, ttmp7
+; GCN-SDAG-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-SDAG-NEXT:    s_endpgm
+;
+; GCN-GISEL-LABEL: workgroup_id_xy:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, ttmp9
+; GCN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, ttmp7
+; GCN-GISEL-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-GISEL-NEXT:    s_endpgm
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %idx, ptr addrspace(1) %ptrx
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %idy, ptr addrspace(1) %ptry
+
+  ret void
+}
+
+define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
+; GCN-SDAG-LABEL: workgroup_id_xyz:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GCN-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, ttmp9
+; GCN-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GCN-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GCN-SDAG-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; GCN-SDAG-NEXT:    global_store_dword v0, v1, s[6:7]
+; GCN-SDAG-NEXT:    s_endpgm
+;
+; GCN-GISEL-LABEL: workgroup_id_xyz:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GCN-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GCN-GISEL-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-GISEL-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GCN-GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-GISEL-NEXT:    global_store_dword v1, v0, s[6:7]
+; GCN-GISEL-NEXT:    s_endpgm
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %idx, ptr addrspace(1) %ptrx
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %idy, ptr addrspace(1) %ptry
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %idz, ptr addrspace(1) %ptrz
+
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()


        


More information about the llvm-commits mailing list