[llvm] 7e9b49f - AMDGPU: Add plumbing for private segment size argument (#96445)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 25 07:20:55 PDT 2024
Author: Nicolai Hähnle
Date: 2024-06-25T16:20:51+02:00
New Revision: 7e9b49f6b86c8616e6211ec02dbccc3ebb615e79
URL: https://github.com/llvm/llvm-project/commit/7e9b49f6b86c8616e6211ec02dbccc3ebb615e79
DIFF: https://github.com/llvm/llvm-project/commit/7e9b49f6b86c8616e6211ec02dbccc3ebb615e79.diff
LOG: AMDGPU: Add plumbing for private segment size argument (#96445)
The actual size of scratch/private is determined at dispatch time, so
add more plumbing to request it. Will be used in subsequent change.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index de25f9241a503..f57fc168c1dfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -115,6 +115,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
return std::tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+ case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
+ return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 42b33c50d9f8c..2e02bb4271adc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -114,11 +114,12 @@ struct AMDGPUFunctionArgInfo {
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
IMPLICIT_ARG_PTR = 16,
+ PRIVATE_SEGMENT_SIZE = 17,
// VGPRS:
- WORKITEM_ID_X = 17,
- WORKITEM_ID_Y = 18,
- WORKITEM_ID_Z = 19,
+ WORKITEM_ID_X = 18,
+ WORKITEM_ID_Y = 19,
+ WORKITEM_ID_Z = 20,
FIRST_VGPR_VALUE = WORKITEM_ID_X
};
// clang-format on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d8e22f4b0d8fa..bb85b03ebb5e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -461,6 +461,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
+ if (UserSGPRInfo.hasPrivateSegmentSize()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+ }
if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
@@ -1397,6 +1401,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
if (UserSGPRInfo.hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ if (UserSGPRInfo.hasPrivateSegmentSize())
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0751c8dc8b8bf..a8e26f104f588 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1104,6 +1104,9 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
if (hasFlatScratchInit())
NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
+
+ if (hasPrivateSegmentSize())
+ NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentSizeID);
}
void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 74c8f85f1b031..bb0746c5b5365 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1567,6 +1567,8 @@ class GCNUserSGPRUsageInfo {
bool hasFlatScratchInit() const { return FlatScratchInit; }
+ bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
+
unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
@@ -1631,6 +1633,8 @@ class GCNUserSGPRUsageInfo {
bool FlatScratchInit = false;
+ bool PrivateSegmentSize = false;
+
unsigned NumKernargPreloadSGPRs = 0;
unsigned NumUsedUserSGPRs = 0;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ac4cbd617b81d..d0b097d2b8feb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2468,6 +2468,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ if (UserSGPRInfo.hasPrivateSegmentSize()) {
+ Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);
+ MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(PrivateSegmentSizeReg);
+ }
+
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 74f6fd64b473e..d9db0f7a4f531 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -232,6 +232,12 @@ Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return ArgInfo.FlatScratchInit.getRegister();
}
+Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {
+ ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());
+ NumUserSGPRs += 1;
+ return ArgInfo.PrivateSegmentSize.getRegister();
+}
+
Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9fe02e24c8a15..7af5e7388f841 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -752,6 +752,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
Register addDispatchID(const SIRegisterInfo &TRI);
Register addFlatScratchInit(const SIRegisterInfo &TRI);
+ Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
Register addLDSKernelId();
SmallVectorImpl<MCRegister> *
More information about the llvm-commits
mailing list