[llvm] 728b878 - [AMDGPU] Set the CostPerUse value for vgpr registers.
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 19 23:23:46 PDT 2020
Author: cdevadas
Date: 2020-03-20T11:49:35+05:30
New Revision: 728b878de689e4921ca7f864ed3036f9b2c53853
URL: https://github.com/llvm/llvm-project/commit/728b878de689e4921ca7f864ed3036f9b2c53853
DIFF: https://github.com/llvm/llvm-project/commit/728b878de689e4921ca7f864ed3036f9b2c53853.diff
LOG: [AMDGPU] Set the CostPerUse value for vgpr registers.
Apart from the argument registers, set the CostPerUse
value as per the ratio reg_index/allocation_granularity.
It is a pre-commit for introducing the scratch registers
in the ABI. This change should help in a balanced
register allocation.
Differential Revision: https://reviews.llvm.org/D76417
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index fb789d05ce9a..7794170765fb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -216,11 +216,16 @@ foreach Index = 0-105 in {
// VGPR registers
foreach Index = 0-255 in {
+ // Set a cost value for vgprs other than the argument registers (v0-v31).
+ // The ratio of index/allocation_granularity is taken as the cost value.
+ // Considered the allocation granularity as 4 here.
+ let CostPerUse=!if(!gt(Index, 31), !srl(Index, 2), 0) in {
def VGPR#Index :
SIReg <"v"#Index, Index>,
DwarfRegNum<[!add(Index, 2560)]> {
let HWEncoding{8} = 1;
}
+ }
}
// AccVGPR registers
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
index 78e2885c523a..9fe431ea486f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@@ -246,7 +246,7 @@ define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %
; GFX908-DAG v_accvgpr_read_b32
; GCN: NumVgprs: 256
-; GFX900: ScratchSize: 644
+; GFX900: ScratchSize: 708
; GFX908-FIXME: ScratchSize: 0
; GCN: VGPRBlocks: 63
; GCN: NumVGPRsForWavesPerEU: 256
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index d9327368ac82..07a29adc6393 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -24,7 +24,7 @@
; OFFREG is offset system SGPR
; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
-; GCN: NumVgprs: 256
+; GCN: NumVgprs: 255
; GCN: ScratchSize: 1536
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <4 x i32>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
More information about the llvm-commits
mailing list