[llvm] r373411 - [AMDGPU] separate accounting for agprs
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 17:26:58 PDT 2019
Author: rampitec
Date: Tue Oct 1 17:26:58 2019
New Revision: 373411
URL: http://llvm.org/viewvc/llvm-project?rev=373411&view=rev
Log:
[AMDGPU] separate accounting for agprs
Account and report agprs separately on gfx908. Other targets
do not change the reporting.
Differential Revision: https://reviews.llvm.org/D68307
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h
llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Oct 1 17:26:58 2019
@@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Mo
// Print comments that apply to both callable functions and entry points.
void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
@@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctio
OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
+ if (NumAGPR) {
+ OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
+ OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
+ false);
+ }
OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
false);
@@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
emitCommonFunctionComments(
Info.NumVGPR,
+ STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
+ Info.getTotalNumVGPRs(STM),
Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
Info.PrivateSegmentSize,
getFunctionCodeSize(MF), MFI);
@@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
}
OutStreamer->emitRawComment(" Kernel info:", false);
- emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+ emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
+ STM.hasMAIInsts()
+ ? CurrentProgramInfo.NumAccVGPR
+ : Optional<uint32_t>(),
+ CurrentProgramInfo.NumVGPR,
CurrentProgramInfo.NumSGPR,
CurrentProgramInfo.ScratchSize,
getFunctionCodeSize(MF), MFI);
@@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionReso
UsesVCC, UsesFlatScratch);
}
+int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
+ const GCNSubtarget &ST) const {
+ return std::max(NumVGPR, NumAGPR);
+}
+
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineFunction &MF) const {
SIFunctionResourceInfo Info;
@@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
HighestVGPRReg = Reg;
break;
}
- MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
- if (MRI.isPhysRegUsed(AReg)) {
- HighestVGPRReg = AReg;
- break;
+ }
+
+ if (ST.hasMAIInsts()) {
+ MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ HighestAGPRReg = Reg;
+ break;
+ }
}
+ Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
+ TRI.getHWRegIndex(HighestAGPRReg) + 1;
}
MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
}
int32_t MaxVGPR = -1;
+ int32_t MaxAGPR = -1;
int32_t MaxSGPR = -1;
uint64_t CalleeFrameSize = 0;
@@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
for (const MachineOperand &MO : MI.operands()) {
unsigned Width = 0;
bool IsSGPR = false;
+ bool IsAGPR = false;
if (!MO.isReg())
continue;
@@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
Width = 1;
} else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
@@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
Width = 2;
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
@@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
Width = 4;
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
@@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
Width = 16;
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 16;
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
IsSGPR = true;
@@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
Width = 32;
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
IsSGPR = false;
+ IsAGPR = true;
Width = 32;
} else {
llvm_unreachable("Unknown register class");
@@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
int MaxUsed = HWReg + Width - 1;
if (IsSGPR) {
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+ } else if (IsAGPR) {
+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
} else {
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
}
@@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
+ MaxAGPR = std::max(MaxAGPR, 23);
CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
Info.UsesVCC = true;
@@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
+ MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
CalleeFrameSize
= std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
Info.UsesVCC |= I->second.UsesVCC;
@@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
Info.NumExplicitSGPR = MaxSGPR + 1;
Info.NumVGPR = MaxVGPR + 1;
+ Info.NumAGPR = MaxAGPR + 1;
Info.PrivateSegmentSize += CalleeFrameSize;
return Info;
@@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) {
SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- ProgInfo.NumVGPR = Info.NumVGPR;
+ ProgInfo.NumArchVGPR = Info.NumVGPR;
+ ProgInfo.NumAccVGPR = Info.NumAGPR;
+ ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
ProgInfo.NumSGPR = Info.NumExplicitSGPR;
ProgInfo.ScratchSize = Info.PrivateSegmentSize;
ProgInfo.VCCUsed = Info.UsesVCC;
@@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
MF.getFunction().getContext().diagnose(DiagStackSize);
}
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Tue Oct 1 17:26:58 2019
@@ -43,6 +43,7 @@ private:
// Track the number of explicitly used VGPRs. Special registers reserved at
// the end are tracked separately.
int32_t NumVGPR = 0;
+ int32_t NumAGPR = 0;
int32_t NumExplicitSGPR = 0;
uint64_t PrivateSegmentSize = 0;
bool UsesVCC = false;
@@ -51,6 +52,7 @@ private:
bool HasRecursion = false;
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+ int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
SIProgramInfo CurrentProgramInfo;
@@ -77,6 +79,8 @@ private:
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
+ Optional<uint32_t> NumAGPR,
+ uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
Modified: llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h Tue Oct 1 17:26:58 2019
@@ -41,6 +41,8 @@ struct SIProgramInfo {
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR = 0;
+ uint32_t NumArchVGPR = 0;
+ uint32_t NumAccVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
Modified: llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll Tue Oct 1 17:26:58 2019
@@ -1,15 +1,134 @@
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
+; GCN-LABEL: {{^}}kernel_32_agprs:
+; GCN: .amdhsa_next_free_vgpr 32
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+; GCN: VGPRBlocks: 7
+; GCN: NumVGPRsForWavesPerEU: 32
+; GCN: Occupancy: 8
+define amdgpu_kernel void @kernel_32_agprs() {
+bb:
+ call void asm sideeffect "", "~{v8}" ()
+ call void asm sideeffect "", "~{a31}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_0_agprs:
+; GCN: .amdhsa_next_free_vgpr 1
+; GCN: NumVgprs: 1
+; GCN: NumAgprs: 0
+; GCN: TotalNumVgprs: 1
+; GCN: VGPRBlocks: 0
+; GCN: NumVGPRsForWavesPerEU: 1
+; GCN: Occupancy: 10
+define amdgpu_kernel void @kernel_0_agprs() {
+bb:
+ call void asm sideeffect "", "~{v0}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_40_vgprs:
+; GCN: .amdhsa_next_free_vgpr 40
+; GCN: NumVgprs: 40
+; GCN: NumAgprs: 16
+; GCN: TotalNumVgprs: 40
+; GCN: VGPRBlocks: 9
+; GCN: NumVGPRsForWavesPerEU: 40
+; GCN: Occupancy: 6
+define amdgpu_kernel void @kernel_40_vgprs() {
+bb:
+ call void asm sideeffect "", "~{v39}" ()
+ call void asm sideeffect "", "~{a15}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_32_agprs:
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+define void @func_32_agprs() #0 {
+bb:
+ call void asm sideeffect "", "~{v8}" ()
+ call void asm sideeffect "", "~{a31}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_32_vgprs:
+; GCN: NumVgprs: 32
+; GCN: NumAgprs: 9
+; GCN: TotalNumVgprs: 32
+define void @func_32_vgprs() {
+bb:
+ call void asm sideeffect "", "~{v31}" ()
+ call void asm sideeffect "", "~{a8}" ()
+ ret void
+}
-; GCN-LABEL: {{^}}test_32_agprs:
-; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}},
-; GCN-NOT: v28
-; GCN: NumVgprs: 32
-; GCN: VGPRBlocks: 7
-define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) {
+; GCN-LABEL: {{^}}func_0_agprs:
+; GCN: NumVgprs: 1
+; GCN: NumAgprs: 0
+; GCN: TotalNumVgprs: 1
+define amdgpu_kernel void @func_0_agprs() {
bb:
- %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
- store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
+ call void asm sideeffect "", "~{v0}" ()
ret void
}
+
+; GCN-LABEL: {{^}}kernel_max_gprs:
+; GCN: .amdhsa_next_free_vgpr 256
+; GCN: NumVgprs: 256
+; GCN: NumAgprs: 256
+; GCN: TotalNumVgprs: 256
+; GCN: VGPRBlocks: 63
+; GCN: NumVGPRsForWavesPerEU: 256
+; GCN: Occupancy: 1
+define amdgpu_kernel void @kernel_max_gprs() {
+bb:
+ call void asm sideeffect "", "~{v255}" ()
+ call void asm sideeffect "", "~{a255}" ()
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
+; GCN: .amdhsa_next_free_vgpr 32
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+; GCN: VGPRBlocks: 7
+; GCN: NumVGPRsForWavesPerEU: 32
+; GCN: Occupancy: 8
+define amdgpu_kernel void @kernel_call_func_32_agprs() {
+bb:
+ call void @func_32_agprs() #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_call_func_32_agprs:
+; GCN: NumVgprs: 9
+; GCN: NumAgprs: 32
+; GCN: TotalNumVgprs: 32
+define void @func_call_func_32_agprs() {
+bb:
+ call void @func_32_agprs() #0
+ ret void
+}
+
+declare void @undef_func()
+
+; GCN-LABEL: {{^}}kernel_call_undef_func:
+; GCN: .amdhsa_next_free_vgpr 24
+; GCN: NumVgprs: 24
+; GCN: NumAgprs: 24
+; GCN: TotalNumVgprs: 24
+; GCN: VGPRBlocks: 5
+; GCN: NumVGPRsForWavesPerEU: 24
+; GCN: Occupancy: 10
+define amdgpu_kernel void @kernel_call_undef_func() {
+bb:
+ call void @undef_func()
+ ret void
+}
+
+attributes #0 = { nounwind noinline }
More information about the llvm-commits
mailing list