[llvm] r332523 - AMDGPU : Recalculate SGPRs when trap handler is supported

Konstantin Zhuravlyov via llvm-commits llvm-commits at lists.llvm.org
Wed May 16 13:47:48 PDT 2018


Author: kzhuravl
Date: Wed May 16 13:47:48 2018
New Revision: 332523

URL: http://llvm.org/viewvc/llvm-project?rev=332523&view=rev
Log:
AMDGPU : Recalculate SGPRs when trap handler is supported

Differential Revision: https://reviews.llvm.org/D29911

Added:
    llvm/trunk/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=332523&r1=332522&r2=332523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed May 16 13:47:48 2018
@@ -358,9 +358,11 @@ unsigned getMinNumSGPRs(const FeatureBit
 
   if (WavesPerEU >= getMaxWavesPerEU(Features))
     return 0;
-  unsigned MinNumSGPRs =
-      alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
-                getSGPRAllocGranule(Features)) + 1;
+
+  unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
+  if (Features.test(FeatureTrapHandler))
+    MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
 }
 
@@ -369,11 +371,13 @@ unsigned getMaxNumSGPRs(const FeatureBit
   assert(WavesPerEU != 0);
 
   IsaVersion Version = getIsaVersion(Features);
-  unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
-                                   getSGPRAllocGranule(Features));
   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
   if (Version.Major >= 8 && !Addressable)
     AddressableNumSGPRs = 112;
+  unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
+  if (Features.test(FeatureTrapHandler))
+    MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=332523&r1=332522&r2=332523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed May 16 13:47:48 2018
@@ -42,7 +42,8 @@ namespace IsaInfo {
 enum {
   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
   // doesn't spill SGPRs as much as when 80 is set.
-  FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+  FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
+  TRAP_NUM_SGPRS = 16
 };
 
 /// Instruction set architecture version.

Added: llvm/trunk/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll?rev=332523&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll Wed May 16 13:47:48 2018
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-ENABLE
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-DISABLE
+
+; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs
+; TRAP-HANDLER-ENABLE:  NumSgprs: 60
+; TRAP-HANDLER-DISABLE: NumSgprs: 76
+define amdgpu_kernel void @amdhsa_trap_num_sgprs(
+    i32 addrspace(1)* %out0, i32 %in0,
+    i32 addrspace(1)* %out1, i32 %in1,
+    i32 addrspace(1)* %out2, i32 %in2,
+    i32 addrspace(1)* %out3, i32 %in3,
+    i32 addrspace(1)* %out4, i32 %in4,
+    i32 addrspace(1)* %out5, i32 %in5,
+    i32 addrspace(1)* %out6, i32 %in6,
+    i32 addrspace(1)* %out7, i32 %in7,
+    i32 addrspace(1)* %out8, i32 %in8,
+    i32 addrspace(1)* %out9, i32 %in9,
+    i32 addrspace(1)* %out10, i32 %in10,
+    i32 addrspace(1)* %out11, i32 %in11,
+    i32 addrspace(1)* %out12, i32 %in12,
+    i32 addrspace(1)* %out13, i32 %in13,
+    i32 addrspace(1)* %out14, i32 %in14,
+    i32 addrspace(1)* %out15, i32 %in15,
+    i32 addrspace(1)* %out16, i32 %in16,
+    i32 addrspace(1)* %out17, i32 %in17,
+    i32 addrspace(1)* %out18, i32 %in18,
+    i32 addrspace(1)* %out19, i32 %in19,
+    i32 addrspace(1)* %out20, i32 %in20,
+    i32 addrspace(1)* %out21, i32 %in21,
+    i32 addrspace(1)* %out22, i32 %in22,
+    i32 addrspace(1)* %out23, i32 %in23,
+    i32 addrspace(1)* %out24, i32 %in24,
+    i32 addrspace(1)* %out25, i32 %in25,
+    i32 addrspace(1)* %out26, i32 %in26,
+    i32 addrspace(1)* %out27, i32 %in27,
+    i32 addrspace(1)* %out28, i32 %in28,
+    i32 addrspace(1)* %out29, i32 %in29) {
+entry:
+  store i32 %in0, i32 addrspace(1)* %out0
+  store i32 %in1, i32 addrspace(1)* %out1
+  store i32 %in2, i32 addrspace(1)* %out2
+  store i32 %in3, i32 addrspace(1)* %out3
+  store i32 %in4, i32 addrspace(1)* %out4
+  store i32 %in5, i32 addrspace(1)* %out5
+  store i32 %in6, i32 addrspace(1)* %out6
+  store i32 %in7, i32 addrspace(1)* %out7
+  store i32 %in8, i32 addrspace(1)* %out8
+  store i32 %in9, i32 addrspace(1)* %out9
+  store i32 %in10, i32 addrspace(1)* %out10
+  store i32 %in11, i32 addrspace(1)* %out11
+  store i32 %in12, i32 addrspace(1)* %out12
+  store i32 %in13, i32 addrspace(1)* %out13
+  store i32 %in14, i32 addrspace(1)* %out14
+  store i32 %in15, i32 addrspace(1)* %out15
+  store i32 %in16, i32 addrspace(1)* %out16
+  store i32 %in17, i32 addrspace(1)* %out17
+  store i32 %in18, i32 addrspace(1)* %out18
+  store i32 %in19, i32 addrspace(1)* %out19
+  store i32 %in20, i32 addrspace(1)* %out20
+  store i32 %in21, i32 addrspace(1)* %out21
+  store i32 %in22, i32 addrspace(1)* %out22
+  store i32 %in23, i32 addrspace(1)* %out23
+  store i32 %in24, i32 addrspace(1)* %out24
+  store i32 %in25, i32 addrspace(1)* %out25
+  store i32 %in26, i32 addrspace(1)* %out26
+  store i32 %in27, i32 addrspace(1)* %out27
+  store i32 %in28, i32 addrspace(1)* %out28
+  store i32 %in29, i32 addrspace(1)* %out29
+  ret void
+}




More information about the llvm-commits mailing list