[llvm] r373411 - [AMDGPU] separate accounting for agprs

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 1 17:26:58 PDT 2019


Author: rampitec
Date: Tue Oct  1 17:26:58 2019
New Revision: 373411

URL: http://llvm.org/viewvc/llvm-project?rev=373411&view=rev
Log:
[AMDGPU] separate accounting for agprs

Account and report agprs separately on gfx908. Other targets
do not change the reporting.

Differential Revision: https://reviews.llvm.org/D68307

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
    llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Oct  1 17:26:58 2019
@@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Mo
 // Print comments that apply to both callable functions and entry points.
 void AMDGPUAsmPrinter::emitCommonFunctionComments(
   uint32_t NumVGPR,
+  Optional<uint32_t> NumAGPR,
+  uint32_t TotalNumVGPR,
   uint32_t NumSGPR,
   uint64_t ScratchSize,
   uint64_t CodeSize,
@@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctio
   OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
   OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
   OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
+  if (NumAGPR) {
+    OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
+    OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
+                                false);
+  }
   OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
   OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
                               false);
@@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
       SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
       emitCommonFunctionComments(
         Info.NumVGPR,
+        STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
+        Info.getTotalNumVGPRs(STM),
         Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
         Info.PrivateSegmentSize,
         getFunctionCodeSize(MF), MFI);
@@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
     }
 
     OutStreamer->emitRawComment(" Kernel info:", false);
-    emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+    emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
+                               STM.hasMAIInsts()
+                                 ? CurrentProgramInfo.NumAccVGPR
+                                 : Optional<uint32_t>(),
+                               CurrentProgramInfo.NumVGPR,
                                CurrentProgramInfo.NumSGPR,
                                CurrentProgramInfo.ScratchSize,
                                getFunctionCodeSize(MF), MFI);
@@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionReso
                                                      UsesVCC, UsesFlatScratch);
 }
 
+int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
+  const GCNSubtarget &ST) const {
+  return std::max(NumVGPR, NumAGPR);
+}
+
 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
   const MachineFunction &MF) const {
   SIFunctionResourceInfo Info;
@@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
         HighestVGPRReg = Reg;
         break;
       }
-      MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
-      if (MRI.isPhysRegUsed(AReg)) {
-        HighestVGPRReg = AReg;
-        break;
+    }
+
+    if (ST.hasMAIInsts()) {
+      MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
+      for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
+        if (MRI.isPhysRegUsed(Reg)) {
+          HighestAGPRReg = Reg;
+          break;
+        }
       }
+      Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
+        TRI.getHWRegIndex(HighestAGPRReg) + 1;
     }
 
     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
   }
 
   int32_t MaxVGPR = -1;
+  int32_t MaxAGPR = -1;
   int32_t MaxSGPR = -1;
   uint64_t CalleeFrameSize = 0;
 
@@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
       for (const MachineOperand &MO : MI.operands()) {
         unsigned Width = 0;
         bool IsSGPR = false;
+        bool IsAGPR = false;
 
         if (!MO.isReg())
           continue;
@@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
           Width = 1;
         } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
           IsSGPR = false;
+          IsAGPR = true;
           Width = 1;
         } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
@@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
           Width = 2;
         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
           IsSGPR = false;
+          IsAGPR = true;
           Width = 2;
         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
           IsSGPR = false;
@@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
           Width = 4;
         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
           IsSGPR = false;
+          IsAGPR = true;
           Width = 4;
         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
@@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
           Width = 16;
         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
           IsSGPR = false;
+          IsAGPR = true;
           Width = 16;
         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
           IsSGPR = true;
@@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
           Width = 32;
         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
           IsSGPR = false;
+          IsAGPR = true;
           Width = 32;
         } else {
           llvm_unreachable("Unknown register class");
@@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
         int MaxUsed = HWReg + Width - 1;
         if (IsSGPR) {
           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
+        } else if (IsAGPR) {
+          MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
         } else {
           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
         }
@@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
             47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
           MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
           MaxVGPR = std::max(MaxVGPR, 23);
+          MaxAGPR = std::max(MaxAGPR, 23);
 
           CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
           Info.UsesVCC = true;
@@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
 
           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
+          MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
           CalleeFrameSize
             = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
           Info.UsesVCC |= I->second.UsesVCC;
@@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
 
   Info.NumExplicitSGPR = MaxSGPR + 1;
   Info.NumVGPR = MaxVGPR + 1;
+  Info.NumAGPR = MaxAGPR + 1;
   Info.PrivateSegmentSize += CalleeFrameSize;
 
   return Info;
@@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                         const MachineFunction &MF) {
   SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
 
-  ProgInfo.NumVGPR = Info.NumVGPR;
+  ProgInfo.NumArchVGPR = Info.NumVGPR;
+  ProgInfo.NumAccVGPR = Info.NumAGPR;
+  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
   ProgInfo.NumSGPR = Info.NumExplicitSGPR;
   ProgInfo.ScratchSize = Info.PrivateSegmentSize;
   ProgInfo.VCCUsed = Info.UsesVCC;
@@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
     MF.getFunction().getContext().diagnose(DiagStackSize);
   }
 
-  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 
   // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Tue Oct  1 17:26:58 2019
@@ -43,6 +43,7 @@ private:
     // Track the number of explicitly used VGPRs. Special registers reserved at
     // the end are tracked separately.
     int32_t NumVGPR = 0;
+    int32_t NumAGPR = 0;
     int32_t NumExplicitSGPR = 0;
     uint64_t PrivateSegmentSize = 0;
     bool UsesVCC = false;
@@ -51,6 +52,7 @@ private:
     bool HasRecursion = false;
 
     int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+    int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
   };
 
   SIProgramInfo CurrentProgramInfo;
@@ -77,6 +79,8 @@ private:
   void EmitPALMetadata(const MachineFunction &MF,
                        const SIProgramInfo &KernelInfo);
   void emitCommonFunctionComments(uint32_t NumVGPR,
+                                  Optional<uint32_t> NumAGPR,
+                                  uint32_t TotalNumVGPR,
                                   uint32_t NumSGPR,
                                   uint64_t ScratchSize,
                                   uint64_t CodeSize,

Modified: llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIProgramInfo.h Tue Oct  1 17:26:58 2019
@@ -41,6 +41,8 @@ struct SIProgramInfo {
     uint64_t ComputePGMRSrc2 = 0;
 
     uint32_t NumVGPR = 0;
+    uint32_t NumArchVGPR = 0;
+    uint32_t NumAccVGPR = 0;
     uint32_t NumSGPR = 0;
     uint32_t LDSSize = 0;
     bool FlatUsed = false;

Modified: llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll?rev=373411&r1=373410&r2=373411&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/agpr-register-count.ll Tue Oct  1 17:26:58 2019
@@ -1,15 +1,134 @@
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
+; GCN-LABEL: {{^}}kernel_32_agprs:
+; GCN:    .amdhsa_next_free_vgpr 32
+; GCN:    NumVgprs: 9
+; GCN:    NumAgprs: 32
+; GCN:    TotalNumVgprs: 32
+; GCN:    VGPRBlocks: 7
+; GCN:    NumVGPRsForWavesPerEU: 32
+; GCN:    Occupancy: 8
+define amdgpu_kernel void @kernel_32_agprs() {
+bb:
+  call void asm sideeffect "", "~{v8}" ()
+  call void asm sideeffect "", "~{a31}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_0_agprs:
+; GCN:    .amdhsa_next_free_vgpr 1
+; GCN:    NumVgprs: 1
+; GCN:    NumAgprs: 0
+; GCN:    TotalNumVgprs: 1
+; GCN:    VGPRBlocks: 0
+; GCN:    NumVGPRsForWavesPerEU: 1
+; GCN:    Occupancy: 10
+define amdgpu_kernel void @kernel_0_agprs() {
+bb:
+  call void asm sideeffect "", "~{v0}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_40_vgprs:
+; GCN:    .amdhsa_next_free_vgpr 40
+; GCN:    NumVgprs: 40
+; GCN:    NumAgprs: 16
+; GCN:    TotalNumVgprs: 40
+; GCN:    VGPRBlocks: 9
+; GCN:    NumVGPRsForWavesPerEU: 40
+; GCN:    Occupancy: 6
+define amdgpu_kernel void @kernel_40_vgprs() {
+bb:
+  call void asm sideeffect "", "~{v39}" ()
+  call void asm sideeffect "", "~{a15}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_32_agprs:
+; GCN:    NumVgprs: 9
+; GCN:    NumAgprs: 32
+; GCN:    TotalNumVgprs: 32
+define void @func_32_agprs() #0 {
+bb:
+  call void asm sideeffect "", "~{v8}" ()
+  call void asm sideeffect "", "~{a31}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_32_vgprs:
+; GCN:    NumVgprs: 32
+; GCN:    NumAgprs: 9
+; GCN:    TotalNumVgprs: 32
+define void @func_32_vgprs() {
+bb:
+  call void asm sideeffect "", "~{v31}" ()
+  call void asm sideeffect "", "~{a8}" ()
+  ret void
+}
 
-; GCN-LABEL: {{^}}test_32_agprs:
-; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}},
-; GCN-NOT: v28
-; GCN: NumVgprs: 32
-; GCN: VGPRBlocks: 7
-define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) {
+; GCN-LABEL: {{^}}func_0_agprs:
+; GCN:    NumVgprs: 1
+; GCN:    NumAgprs: 0
+; GCN:    TotalNumVgprs: 1
+define amdgpu_kernel void @func_0_agprs() {
 bb:
-  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
-  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
+  call void asm sideeffect "", "~{v0}" ()
   ret void
 }
+
+; GCN-LABEL: {{^}}kernel_max_gprs:
+; GCN:    .amdhsa_next_free_vgpr 256
+; GCN:    NumVgprs: 256
+; GCN:    NumAgprs: 256
+; GCN:    TotalNumVgprs: 256
+; GCN:    VGPRBlocks: 63
+; GCN:    NumVGPRsForWavesPerEU: 256
+; GCN:    Occupancy: 1
+define amdgpu_kernel void @kernel_max_gprs() {
+bb:
+  call void asm sideeffect "", "~{v255}" ()
+  call void asm sideeffect "", "~{a255}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
+; GCN:    .amdhsa_next_free_vgpr 32
+; GCN:    NumVgprs: 9
+; GCN:    NumAgprs: 32
+; GCN:    TotalNumVgprs: 32
+; GCN:    VGPRBlocks: 7
+; GCN:    NumVGPRsForWavesPerEU: 32
+; GCN:    Occupancy: 8
+define amdgpu_kernel void @kernel_call_func_32_agprs() {
+bb:
+  call void @func_32_agprs() #0
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_call_func_32_agprs:
+; GCN:    NumVgprs: 9
+; GCN:    NumAgprs: 32
+; GCN:    TotalNumVgprs: 32
+define void @func_call_func_32_agprs() {
+bb:
+  call void @func_32_agprs() #0
+  ret void
+}
+
+declare void @undef_func()
+
+; GCN-LABEL: {{^}}kernel_call_undef_func:
+; GCN:    .amdhsa_next_free_vgpr 24
+; GCN:    NumVgprs: 24
+; GCN:    NumAgprs: 24
+; GCN:    TotalNumVgprs: 24
+; GCN:    VGPRBlocks: 5
+; GCN:    NumVGPRsForWavesPerEU: 24
+; GCN:    Occupancy: 10
+define amdgpu_kernel void @kernel_call_undef_func() {
+bb:
+  call void @undef_func()
+  ret void
+}
+
+attributes #0 = { nounwind noinline }




More information about the llvm-commits mailing list