[llvm] 77f8f81 - [AMDGPU] Return restricted number of regs from TTI
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 9 14:31:42 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-07-09T14:31:28-07:00
New Revision: 77f8f813a9ae20152129a8ebb9fea5fcec859194
URL: https://github.com/llvm/llvm-project/commit/77f8f813a9ae20152129a8ebb9fea5fcec859194
DIFF: https://github.com/llvm/llvm-project/commit/77f8f813a9ae20152129a8ebb9fea5fcec859194.diff
LOG: [AMDGPU] Return restricted number of regs from TTI
This is practically NFC at the moment because nothing really
asks the real number or does anything useful with it.
Differential Revision: https://reviews.llvm.org/D82202
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 24f079ffe929..8783427b5002 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -239,7 +239,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
// The concept of vector registers doesn't really exist. Some packed vector
// operations operate on the normal 32-bit registers.
- return 256;
+ return MaxVGPRs;
}
unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
@@ -248,6 +248,13 @@ unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
return getHardwareNumberOfRegisters(Vec) >> 3;
}
+unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+ const TargetRegisterClass *RC = TRI->getRegClass(RCID);
+ unsigned NumVGPRs = (TRI->getRegSizeInBits(*RC) + 31) / 32;
+ return getHardwareNumberOfRegisters(false) / NumVGPRs;
+}
+
unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
return 32;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 508ed061e935..b8a027c79bfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -74,6 +74,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
bool HasFP32Denormals;
+ unsigned MaxVGPRs;
const FeatureBitset InlineFeatureIgnoreList = {
// Codegen control options which don't matter.
@@ -133,7 +134,11 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
- HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {}
+ HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()),
+ MaxVGPRs(ST->getMaxNumVGPRs(
+ std::max(ST->getWavesPerEU(F).first,
+ ST->getWavesPerEUForWorkGroup(
+ ST->getFlatWorkGroupSizes(F).second)))) {}
bool hasBranchDivergence() { return true; }
bool useGPUDivergenceAnalysis() const;
@@ -148,6 +153,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
unsigned getHardwareNumberOfRegisters(bool Vector) const;
unsigned getNumberOfRegisters(bool Vector) const;
+ unsigned getNumberOfRegisters(unsigned RCID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
More information about the llvm-commits
mailing list