[llvm] 7e9b49f - AMDGPU: Add plumbing for private segment size argument (#96445)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 25 07:20:55 PDT 2024


Author: Nicolai Hähnle
Date: 2024-06-25T16:20:51+02:00
New Revision: 7e9b49f6b86c8616e6211ec02dbccc3ebb615e79

URL: https://github.com/llvm/llvm-project/commit/7e9b49f6b86c8616e6211ec02dbccc3ebb615e79
DIFF: https://github.com/llvm/llvm-project/commit/7e9b49f6b86c8616e6211ec02dbccc3ebb615e79.diff

LOG: AMDGPU: Add plumbing for private segment size argument (#96445)

The actual size of scratch/private is determined at dispatch time, so
add more plumbing to request it. Will be used in subsequent change.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index de25f9241a503..f57fc168c1dfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -115,6 +115,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
     return std::tuple(
         PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
         &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+  case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
+    return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
+            &AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
   case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
     return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
                       &AMDGPU::SGPR_64RegClass,

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 42b33c50d9f8c..2e02bb4271adc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -114,11 +114,12 @@ struct AMDGPUFunctionArgInfo {
     PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
     IMPLICIT_BUFFER_PTR = 15,
     IMPLICIT_ARG_PTR = 16,
+    PRIVATE_SEGMENT_SIZE = 17,
 
     // VGPRS:
-    WORKITEM_ID_X       = 17,
-    WORKITEM_ID_Y       = 18,
-    WORKITEM_ID_Z       = 19,
+    WORKITEM_ID_X       = 18,
+    WORKITEM_ID_Y       = 19,
+    WORKITEM_ID_Z       = 20,
     FIRST_VGPR_VALUE    = WORKITEM_ID_X
   };
   // clang-format on

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d8e22f4b0d8fa..bb85b03ebb5e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -461,6 +461,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
     KernelCodeProperties |=
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
   }
+  if (UserSGPRInfo.hasPrivateSegmentSize()) {
+    KernelCodeProperties |=
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+  }
   if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
     KernelCodeProperties |=
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
@@ -1397,6 +1401,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
   if (UserSGPRInfo.hasFlatScratchInit())
     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
 
+  if (UserSGPRInfo.hasPrivateSegmentSize())
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
   if (UserSGPRInfo.hasDispatchPtr())
     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0751c8dc8b8bf..a8e26f104f588 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1104,6 +1104,9 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
 
   if (hasFlatScratchInit())
     NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
+
+  if (hasPrivateSegmentSize())
+    NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentSizeID);
 }
 
 void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 74c8f85f1b031..bb0746c5b5365 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1567,6 +1567,8 @@ class GCNUserSGPRUsageInfo {
 
   bool hasFlatScratchInit() const { return FlatScratchInit; }
 
+  bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
+
   unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
 
   unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
@@ -1631,6 +1633,8 @@ class GCNUserSGPRUsageInfo {
 
   bool FlatScratchInit = false;
 
+  bool PrivateSegmentSize = false;
+
   unsigned NumKernargPreloadSGPRs = 0;
 
   unsigned NumUsedUserSGPRs = 0;

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ac4cbd617b81d..d0b097d2b8feb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2468,6 +2468,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
     CCInfo.AllocateReg(FlatScratchInitReg);
   }
 
+  if (UserSGPRInfo.hasPrivateSegmentSize()) {
+    Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);
+    MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
+    CCInfo.AllocateReg(PrivateSegmentSizeReg);
+  }
+
   // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
   // these from the dispatch pointer.
 }

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 74f6fd64b473e..d9db0f7a4f531 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -232,6 +232,12 @@ Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
   return ArgInfo.FlatScratchInit.getRegister();
 }
 
+Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {
+  ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());
+  NumUserSGPRs += 1;
+  return ArgInfo.PrivateSegmentSize.getRegister();
+}
+
 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9fe02e24c8a15..7af5e7388f841 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -752,6 +752,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
   Register addDispatchID(const SIRegisterInfo &TRI);
   Register addFlatScratchInit(const SIRegisterInfo &TRI);
+  Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
   Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
   Register addLDSKernelId();
   SmallVectorImpl<MCRegister> *


        


More information about the llvm-commits mailing list