[llvm] 343be51 - [AMDGPU] Add utilities to track number of user SGPRs. NFC.
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 08:52:47 PDT 2023
Author: Austin Kerbow
Date: 2023-09-12T08:52:30-07:00
New Revision: 343be5132e2831d856de021593cb41a0595862d3
URL: https://github.com/llvm/llvm-project/commit/343be5132e2831d856de021593cb41a0595862d3
DIFF: https://github.com/llvm/llvm-project/commit/343be5132e2831d856de021593cb41a0595862d3.diff
LOG: [AMDGPU] Add utilities to track number of user SGPRs. NFC.
Factor out and unify some common code that calculates and tracks the
number of user SGRPs.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D159439
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 70045e2d313eeda..9718f6e9f949940 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -393,28 +393,29 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
uint16_t KernelCodeProperties = 0;
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
- if (MFI.hasPrivateSegmentBuffer()) {
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
- if (MFI.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
}
- if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
+ if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
}
- if (MFI.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
}
- if (MFI.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
}
- if (MFI.hasFlatScratchInit()) {
+ if (UserSGPRInfo.hasFlatScratchInit()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
@@ -1165,27 +1166,28 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
- if (MFI->hasPrivateSegmentBuffer()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
- if (MFI->hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
+ if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
- if (MFI->hasKernargSegmentPtr())
+ if (UserSGPRInfo.hasKernargSegmentPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
- if (MFI->hasDispatchID())
+ if (UserSGPRInfo.hasDispatchID())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
- if (MFI->hasFlatScratchInit())
+ if (UserSGPRInfo.hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
- if (MFI->hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index a3bfeaa208b7045..db0f56416051f01 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -455,27 +455,28 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info.hasPrivateSegmentBuffer()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
const Module *M = MF.getFunction().getParent();
- if (Info.hasQueuePtr() &&
+ if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -486,13 +487,13 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(InputPtrReg);
}
- if (Info.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info.hasFlatScratchInit()) {
+ if (UserSGPRInfo.hasFlatScratchInit()) {
Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
@@ -597,15 +598,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
- if (Info->hasImplicitBufferPtr()) {
+ if (UserSGPRInfo.hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: This probably isn't defined for mesa
- if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
+ if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 9c7d1ac183188fe..5f2b4d4cca5d2f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -1103,7 +1103,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
Offset += 8; // Skipped.
}
- if (MFI.hasQueuePtr())
+ if (MFI.getUserSGPRInfo().hasQueuePtr())
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 804bf503e4f9f9e..db5d2bbcf5bbc71 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -185,7 +185,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
//
// If we only have implicit uses of flat_scr on flat instructions, it is not
// really needed.
- if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
+ if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 9b50f4fa53ac5e2..c62ee841902c4d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -17,6 +17,7 @@
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "R600Subtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -692,7 +693,7 @@ GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- return getBaseReservedNumSGPRs(MFI.hasFlatScratchInit());
+ return getBaseReservedNumSGPRs(MFI.getUserSGPRInfo().hasFlatScratchInit());
}
unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {
@@ -770,25 +771,27 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
getReservedNumSGPRs(MF));
}
-static unsigned getMaxNumPreloadedSGPRs() {
+static constexpr unsigned getMaxNumPreloadedSGPRs() {
+ using USI = GCNUserSGPRUsageInfo;
// Max number of user SGPRs
- unsigned MaxUserSGPRs = 4 + // private segment buffer
- 2 + // Dispatch ptr
- 2 + // queue ptr
- 2 + // kernel segment ptr
- 2 + // dispatch ID
- 2 + // flat scratch init
- 2; // Implicit buffer ptr
+ const unsigned MaxUserSGPRs =
+ USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +
+ USI::getNumUserSGPRForField(USI::DispatchPtrID) +
+ USI::getNumUserSGPRForField(USI::QueuePtrID) +
+ USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +
+ USI::getNumUserSGPRForField(USI::DispatchIdID) +
+ USI::getNumUserSGPRForField(USI::FlatScratchInitID) +
+ USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);
// Max number of system SGPRs
- unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
- 1 + // WorkGroupIDY
- 1 + // WorkGroupIDZ
- 1 + // WorkGroupInfo
- 1; // private segment wave byte offset
+ const unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
+ 1 + // WorkGroupIDY
+ 1 + // WorkGroupIDZ
+ 1 + // WorkGroupInfo
+ 1; // private segment wave byte offset
// Max number of synthetic SGPRs
- unsigned SyntheticSGPRs = 1; // LDSKernelId
+ const unsigned SyntheticSGPRs = 1; // LDSKernelId
return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
}
@@ -1018,3 +1021,73 @@ const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Funct
else
return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
}
+
+GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
+ const GCNSubtarget &ST) {
+ const CallingConv::ID CC = F.getCallingConv();
+ const bool IsKernel =
+ CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;
+ // FIXME: Should have analysis or something rather than attribute to detect
+ // calls.
+ const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
+ // FIXME: This attribute is a hack, we just need an analysis on the function
+ // to look for allocas.
+ const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+
+ if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))
+ KernargSegmentPtr = true;
+
+ bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (IsAmdHsaOrMesa && !ST.enableFlatScratch())
+ PrivateSegmentBuffer = true;
+ else if (ST.isMesaGfxShader(F))
+ ImplicitBufferPtr = true;
+
+ if (!AMDGPU::isGraphics(CC)) {
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
+ DispatchPtr = true;
+
+ // FIXME: Can this always be disabled with < COv5?
+ if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
+ QueuePtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
+ DispatchID = true;
+ }
+
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls or stack objects that may require it before argument
+ // lowering.
+ if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
+ (IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
+ (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+ !ST.flatScratchIsArchitected()) {
+ FlatScratchInit = true;
+ }
+}
+
+unsigned GCNUserSGPRUsageInfo::getNumUsedUserSGPRs() const {
+ unsigned NumUserSGPRs = 0;
+ if (hasImplicitBufferPtr())
+ NumUserSGPRs += getNumUserSGPRForField(ImplicitBufferPtrID);
+
+ if (hasPrivateSegmentBuffer())
+ NumUserSGPRs += getNumUserSGPRForField(PrivateSegmentBufferID);
+
+ if (hasDispatchPtr())
+ NumUserSGPRs += getNumUserSGPRForField(DispatchPtrID);
+
+ if (hasQueuePtr())
+ NumUserSGPRs += getNumUserSGPRForField(QueuePtrID);
+
+ if (hasKernargSegmentPtr())
+ NumUserSGPRs += getNumUserSGPRForField(KernargSegmentPtrID);
+
+ if (hasDispatchID())
+ NumUserSGPRs += getNumUserSGPRForField(DispatchIdID);
+
+ if (hasFlatScratchInit())
+ NumUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
+
+ return NumUserSGPRs;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 425b40f4bd9a23b..0a4c4d62c6a875f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
@@ -1378,6 +1379,79 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
}
};
+class GCNUserSGPRUsageInfo {
+public:
+ unsigned getNumUsedUserSGPRs() const;
+
+ bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
+
+ bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
+
+ bool hasDispatchPtr() const { return DispatchPtr; }
+
+ bool hasQueuePtr() const { return QueuePtr; }
+
+ bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
+
+ bool hasDispatchID() const { return DispatchID; }
+
+ bool hasFlatScratchInit() const { return FlatScratchInit; }
+
+ enum UserSGPRID : unsigned {
+ ImplicitBufferPtrID = 0,
+ PrivateSegmentBufferID = 1,
+ DispatchPtrID = 2,
+ QueuePtrID = 3,
+ KernargSegmentPtrID = 4,
+ DispatchIdID = 5,
+ FlatScratchInitID = 6,
+ PrivateSegmentSizeID = 7
+ };
+
+ // Returns the size in number of SGPRs for preload user SGPR field.
+ static constexpr unsigned getNumUserSGPRForField(UserSGPRID ID) {
+ switch (ID) {
+ case ImplicitBufferPtrID:
+ return 2;
+ case PrivateSegmentBufferID:
+ return 4;
+ case DispatchPtrID:
+ return 2;
+ case QueuePtrID:
+ return 2;
+ case KernargSegmentPtrID:
+ return 2;
+ case DispatchIdID:
+ return 2;
+ case FlatScratchInitID:
+ return 2;
+ case PrivateSegmentSizeID:
+ return 1;
+ }
+ llvm_unreachable("Unknown UserSGPRID.");
+ }
+
+ GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
+
+private:
+ // Private memory buffer
+ // Compute directly in sgpr[0:1]
+ // Other shaders indirect 64-bits at sgpr[0:1]
+ bool ImplicitBufferPtr = false;
+
+ bool PrivateSegmentBuffer = false;
+
+ bool DispatchPtr = false;
+
+ bool QueuePtr = false;
+
+ bool KernargSegmentPtr = false;
+
+ bool DispatchID = false;
+
+ bool FlatScratchInit = false;
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 865caae240f3470..d09a432d38c0e38 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -692,7 +692,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
bool NeedsFlatScratchInit =
- MFI->hasFlatScratchInit() &&
+ MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
@@ -775,7 +775,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- if (MFI->hasImplicitBufferPtr()) {
+ if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
@@ -814,7 +814,6 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
}
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1020878955f7fce..683fb5b2cd0b211 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -2143,13 +2144,14 @@ void SITargetLowering::allocateSpecialInputSGPRs(
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
// TODO: Unify handling with private memory pointers.
- if (Info.hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
const Module *M = MF.getFunction().getParent();
- if (Info.hasQueuePtr() &&
+ if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
@@ -2158,7 +2160,7 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (Info.hasDispatchID())
+ if (UserSGPRInfo.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
@@ -2181,34 +2183,35 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
- if (Info.hasImplicitBufferPtr()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
+ if (UserSGPRInfo.hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info.hasPrivateSegmentBuffer()) {
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
const Module *M = MF.getFunction().getParent();
- if (Info.hasQueuePtr() &&
+ if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
CCInfo.AllocateReg(InputPtrReg);
@@ -2217,13 +2220,13 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
}
- if (Info.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
+ if (UserSGPRInfo.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
@@ -2487,12 +2490,13 @@ SDValue SITargetLowering::LowerFormalArguments(
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
if (IsGraphics) {
- assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
- !Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
+ assert(!UserSGPRInfo.hasDispatchPtr() &&
+ !UserSGPRInfo.hasKernargSegmentPtr() && !Info->hasWorkGroupInfo() &&
+ !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
+ !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
if (!Subtarget->enableFlatScratch())
- assert(!Info->hasFlatScratchInit());
+ assert(!UserSGPRInfo.hasFlatScratchInit());
if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ());
@@ -9151,7 +9155,7 @@ static bool addressMayBeAccessedAsPrivate(const MachineMemOperand *MMO,
const SIMachineFunctionInfo &Info) {
// TODO: Should check if the address can definitely not access stack.
if (Info.isEntryFunction())
- return Info.hasFlatScratchInit();
+ return Info.getUserSGPRInfo().hasFlatScratchInit();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 2d0dd2bd7b2f8a8..bbee6d77733b52b 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -7,17 +7,18 @@
//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
-#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -36,28 +37,12 @@ const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget *STI)
- : AMDGPUMachineFunction(F, *STI),
- Mode(F),
- GWSResourcePSV(getTM(STI)),
- PrivateSegmentBuffer(false),
- DispatchPtr(false),
- QueuePtr(false),
- KernargSegmentPtr(false),
- DispatchID(false),
- FlatScratchInit(false),
- WorkGroupIDX(false),
- WorkGroupIDY(false),
- WorkGroupIDZ(false),
- WorkGroupInfo(false),
- LDSKernelId(false),
- PrivateSegmentWaveByteOffset(false),
- WorkItemIDX(false),
- WorkItemIDY(false),
- WorkItemIDZ(false),
- ImplicitBufferPtr(false),
- ImplicitArgPtr(false),
- GITPtrHigh(0xffffffff),
- HighBitsOf32BitAddress(0) {
+ : AMDGPUMachineFunction(F, *STI), Mode(F), GWSResourcePSV(getTM(STI)),
+ UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
+ WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
+ PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
+ WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
+ GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
@@ -67,16 +52,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
VRegFlags.reserve(1024);
- // FIXME: Should have analysis or something rather than attribute to detect
- // calls.
- const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
-
const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
CC == CallingConv::SPIR_KERNEL;
if (IsKernel) {
- if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
- KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
@@ -128,12 +107,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
}
- bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
- if (isAmdHsaOrMesa && !ST.enableFlatScratch())
- PrivateSegmentBuffer = true;
- else if (ST.isMesaGfxShader(F))
- ImplicitBufferPtr = true;
-
if (!AMDGPU::isGraphics(CC) ||
(CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
@@ -158,33 +131,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ST.getMaxWorkitemID(F, 2) != 0)
WorkItemIDZ = true;
- if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
- DispatchPtr = true;
-
- if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
- QueuePtr = true;
-
- if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
- DispatchID = true;
-
if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
LDSKernelId = true;
}
- // FIXME: This attribute is a hack, we just need an analysis on the function
- // to look for allocas.
- bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
-
- // TODO: This could be refined a lot. The attribute is a poor way of
- // detecting calls or stack objects that may require it before argument
- // lowering.
- if (ST.hasFlatAddressSpace() && isEntryFunction() &&
- (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
- (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
- !ST.flatScratchIsArchitected()) {
- FlatScratchInit = true;
- }
-
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4aa9a2fc0bb386a..51d5bab7a142961 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -16,6 +16,7 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIModeRegisterDefaults.h"
@@ -436,13 +437,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
unsigned NumSpilledSGPRs = 0;
unsigned NumSpilledVGPRs = 0;
- // Feature bits required for inputs passed in user SGPRs.
- bool PrivateSegmentBuffer : 1;
- bool DispatchPtr : 1;
- bool QueuePtr : 1;
- bool KernargSegmentPtr : 1;
- bool DispatchID : 1;
- bool FlatScratchInit : 1;
+ // Tracks information about user SGPRs that will be setup by hardware which
+ // will apply to all wavefronts of the grid.
+ GCNUserSGPRUsageInfo UserSGPRInfo;
// Feature bits required for inputs passed in system SGPRs.
bool WorkGroupIDX : 1; // Always initialized.
@@ -456,11 +453,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
bool WorkItemIDY : 1;
bool WorkItemIDZ : 1;
- // Private memory buffer
- // Compute directly in sgpr[0:1]
- // Other shaders indirect 64-bits at sgpr[0:1]
- bool ImplicitBufferPtr : 1;
-
// Pointer to where the ABI inserts special kernel arguments separate from the
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
@@ -601,6 +593,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
return PrologEpilogSGPRSpills;
}
+ const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; }
+
void addToPrologEpilogSGPRSpills(Register Reg,
PrologEpilogSGPRSaveRestoreInfo SI) {
PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI));
@@ -780,6 +774,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
return ArgInfo.WorkGroupInfo.getRegister();
}
+ bool hasLDSKernelId() const { return LDSKernelId; }
+
// Add special VGPR inputs
void setWorkItemIDX(ArgDescriptor Arg) {
ArgInfo.WorkItemIDX = Arg;
@@ -804,30 +800,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
- bool hasPrivateSegmentBuffer() const {
- return PrivateSegmentBuffer;
- }
-
- bool hasDispatchPtr() const {
- return DispatchPtr;
- }
-
- bool hasQueuePtr() const {
- return QueuePtr;
- }
-
- bool hasKernargSegmentPtr() const {
- return KernargSegmentPtr;
- }
-
- bool hasDispatchID() const {
- return DispatchID;
- }
-
- bool hasFlatScratchInit() const {
- return FlatScratchInit;
- }
-
bool hasWorkGroupIDX() const {
return WorkGroupIDX;
}
@@ -844,8 +816,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
return WorkGroupInfo;
}
- bool hasLDSKernelId() const { return LDSKernelId; }
-
bool hasPrivateSegmentWaveByteOffset() const {
return PrivateSegmentWaveByteOffset;
}
@@ -866,10 +836,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
return ImplicitArgPtr;
}
- bool hasImplicitBufferPtr() const {
- return ImplicitBufferPtr;
- }
-
AMDGPUFunctionArgInfo &getArgInfo() {
return ArgInfo;
}
More information about the llvm-commits
mailing list