[llvm] r289260 - AMDGPU/SI: Allow using SGPRs 96-101 on VI

Marek Olsak via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 9 11:49:41 PST 2016


Author: mareko
Date: Fri Dec  9 13:49:40 2016
New Revision: 289260

URL: http://llvm.org/viewvc/llvm-project?rev=289260&view=rev
Log:
AMDGPU/SI: Allow using SGPRs 96-101 on VI

Summary:
There is no point in setting SGPRS=104, because VI allocates SGPRs
in multiples of 16, so 104 -> 112. That enables us to use all 102 SGPRs
for general purposes.

Reviewers: tstellarAMD

Subscribers: qcolombet, arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D27149

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=289260&r1=289259&r2=289260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Fri Dec  9 13:49:40 2016
@@ -489,6 +489,22 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
       RI->getHWRegIndex(MFI->getScratchRSrcReg());
   }
 
+  // Check the addressable register limit before we add ExtraSGPRs.
+  if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+      !STM.hasSGPRInitBug()) {
+    unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
+    if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
+      // This can happen due to a compiler bug or when using inline asm.
+      LLVMContext &Ctx = MF.getFunction()->getContext();
+      DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
+                                       "addressable scalar registers",
+                                       MaxSGPR + 1, DS_Error,
+                                       DK_ResourceLimit, MaxAddressableNumSGPRs);
+      Ctx.diagnose(Diag);
+      MaxSGPR = MaxAddressableNumSGPRs - 1;
+    }
+  }
+
   // Account for extra SGPRs and VGPRs reserved for debugger use.
   MaxSGPR += ExtraSGPRs;
   MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
@@ -505,19 +521,22 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
   ProgInfo.NumVGPRsForWavesPerEU = std::max(
     ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
 
-  unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
-  if (ProgInfo.NumSGPR > MaxNumSGPRs) {
-    // This can happen due to a compiler bug or when using inline asm to use the
-    // registers which are usually reserved for vcc etc.
-
-    LLVMContext &Ctx = MF.getFunction()->getContext();
-    DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
-                                     "scalar registers",
-                                     ProgInfo.NumSGPR, DS_Error,
-                                     DK_ResourceLimit, MaxNumSGPRs);
-    Ctx.diagnose(Diag);
-    ProgInfo.NumSGPR = MaxNumSGPRs;
-    ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
+  if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
+      STM.hasSGPRInitBug()) {
+    unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
+    if (ProgInfo.NumSGPR > MaxNumSGPRs) {
+      // This can happen due to a compiler bug or when using inline asm to use the
+      // registers which are usually reserved for vcc etc.
+
+      LLVMContext &Ctx = MF.getFunction()->getContext();
+      DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
+                                       "scalar registers",
+                                       ProgInfo.NumSGPR, DS_Error,
+                                       DK_ResourceLimit, MaxNumSGPRs);
+      Ctx.diagnose(Diag);
+      ProgInfo.NumSGPR = MaxNumSGPRs;
+      ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
+    }
   }
 
   if (STM.hasSGPRInitBug()) {

Modified: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp?rev=289260&r1=289259&r2=289260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp Fri Dec  9 13:49:40 2016
@@ -144,7 +144,7 @@ void GCNMaxOccupancySchedStrategy::pickN
   unsigned VGPRExcessLimit =
       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
   unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
-  unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves);
+  unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
   unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
 
   ReadyQueue &Q = Zone.Available;

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=289260&r1=289259&r2=289260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Dec  9 13:49:40 2016
@@ -1211,14 +1211,15 @@ unsigned SIRegisterInfo::getMinNumSGPRs(
 }
 
 unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
-                                        unsigned WavesPerEU) const {
+                                        unsigned WavesPerEU,
+                                        bool Addressable) const {
   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
     switch (WavesPerEU) {
       case 0:  return 80;
       case 10: return 80;
       case 9:  return 80;
       case 8:  return 96;
-      default: return getNumAddressableSGPRs(ST);
+      default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
     }
   } else {
     switch (WavesPerEU) {
@@ -1243,7 +1244,8 @@ unsigned SIRegisterInfo::getMaxNumSGPRs(
   // Compute maximum number of SGPRs function can use using default/requested
   // minimum number of waves per execution unit.
   std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
-  unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first);
+  unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
+  unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
 
   // Check if maximum number of SGPRs was explicitly requested using
   // "amdgpu-num-sgpr" attribute.
@@ -1268,7 +1270,7 @@ unsigned SIRegisterInfo::getMaxNumSGPRs(
 
     // Make sure requested value is compatible with values implied by
     // default/requested minimum/maximum number of waves per execution unit.
-    if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first))
+    if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
       Requested = 0;
     if (WavesPerEU.second &&
         Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
@@ -1281,7 +1283,7 @@ unsigned SIRegisterInfo::getMaxNumSGPRs(
   if (ST.hasSGPRInitBug())
     MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
 
-  return MaxNumSGPRs - getNumReservedSGPRs(ST);
+  return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST), MaxNumAddressableSGPRs);
 }
 
 unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=289260&r1=289259&r2=289260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Fri Dec  9 13:49:40 2016
@@ -206,7 +206,8 @@ public:
 
   /// \returns Maximum number of SGPRs that meets given number of waves per
   /// execution unit requirement for given subtarget.
-  unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
+  unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
+                          bool Addressable) const;
 
   /// \returns Maximum number of SGPRs that meets number of waves per execution
   /// unit requirement for function \p MF, or number of SGPRs explicitly

Modified: llvm/trunk/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/exceed-max-sgprs.ll?rev=289260&r1=289259&r2=289260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/exceed-max-sgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/exceed-max-sgprs.ll Fri Dec  9 13:49:40 2016
@@ -76,7 +76,7 @@ define void @use_too_many_sgprs_iceland(
   ret void
 }
 
-; ERROR: error: scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji
+; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji
 define void @use_too_many_sgprs_fiji() #3 {
   call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
   call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll?rev=289260&r1=289259&r2=289260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll Fri Dec  9 13:49:40 2016
@@ -3,8 +3,9 @@
 
 ; Make sure this doesn't crash.
 ; ALL-LABEL: {{^}}test:
-; ALL: s_mov_b32 s92, SCRATCH_RSRC_DWORD0
-; ALL: s_mov_b32 s91, s3
+; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0
+; ALL: s_mov_b32 s[[OFF:[0-9]+]], s3
+; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000
 
 ; Make sure we are handling hazards correctly.
 ; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
@@ -15,11 +16,11 @@
 
 ; Make sure scratch wave offset register is correctly incremented and
 ; then restored.
-; SMEM: s_mov_b32 m0, s91{{$}}
-; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[92:95], m0 ; 16-byte Folded Spill
+; SMEM: s_mov_b32 m0, s[[OFF]]{{$}}
+; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Spill
 
-; SMEM: s_mov_b32 m0, s91{{$}}
-; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[92:95], m0 ; 16-byte Folded Reload
+; SMEM: s_mov_b32 m0, s[[OFF]]{{$}}
+; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Reload
 
 ; SMEM: s_dcache_wb
 ; ALL: s_endpgm




More information about the llvm-commits mailing list